* config/rs6000/rs6000.c (rs6000_output_symbol_ref): Move storage
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob1a0c4c0fe420a098fa5f7190f28e3d572998defb
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "print-tree.h"
48 #include "varasm.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "output.h"
52 #include "dbxout.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "sched-int.h"
57 #include "gimplify.h"
58 #include "gimple-iterator.h"
59 #include "gimple-walk.h"
60 #include "intl.h"
61 #include "params.h"
62 #include "tm-constrs.h"
63 #include "tree-vectorizer.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "context.h"
67 #include "tree-pass.h"
68 #if TARGET_XCOFF
69 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
70 #endif
71 #if TARGET_MACHO
72 #include "gstab.h" /* for N_SLINE */
73 #endif
74 #include "case-cfn-macros.h"
75 #include "ppc-auxv.h"
77 /* This file should be included last. */
78 #include "target-def.h"
80 #ifndef TARGET_NO_PROTOTYPE
81 #define TARGET_NO_PROTOTYPE 0
82 #endif
84 #define min(A,B) ((A) < (B) ? (A) : (B))
85 #define max(A,B) ((A) > (B) ? (A) : (B))
87 /* Structure used to define the rs6000 stack */
88 typedef struct rs6000_stack {
89 int reload_completed; /* stack info won't change from here on */
90 int first_gp_reg_save; /* first callee saved GP register used */
91 int first_fp_reg_save; /* first callee saved FP register used */
92 int first_altivec_reg_save; /* first callee saved AltiVec register used */
93 int lr_save_p; /* true if the link reg needs to be saved */
94 int cr_save_p; /* true if the CR reg needs to be saved */
95 unsigned int vrsave_mask; /* mask of vec registers to save */
96 int push_p; /* true if we need to allocate stack space */
97 int calls_p; /* true if the function makes any calls */
98 int world_save_p; /* true if we're saving *everything*:
99 r13-r31, cr, f14-f31, vrsave, v20-v31 */
100 enum rs6000_abi abi; /* which ABI to use */
101 int gp_save_offset; /* offset to save GP regs from initial SP */
102 int fp_save_offset; /* offset to save FP regs from initial SP */
103 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
104 int lr_save_offset; /* offset to save LR from initial SP */
105 int cr_save_offset; /* offset to save CR from initial SP */
106 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
107 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
108 int varargs_save_offset; /* offset to save the varargs registers */
109 int ehrd_offset; /* offset to EH return data */
110 int ehcr_offset; /* offset to EH CR field data */
111 int reg_size; /* register size (4 or 8) */
112 HOST_WIDE_INT vars_size; /* variable save area size */
113 int parm_size; /* outgoing parameter size */
114 int save_size; /* save area size */
115 int fixed_size; /* fixed size of stack frame */
116 int gp_size; /* size of saved GP registers */
117 int fp_size; /* size of saved FP registers */
118 int altivec_size; /* size of saved AltiVec registers */
119 int cr_size; /* size to hold CR if not in fixed area */
120 int vrsave_size; /* size to hold VRSAVE */
121 int altivec_padding_size; /* size of altivec alignment padding */
122 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
123 int spe_padding_size;
124 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
125 int spe_64bit_regs_used;
126 int savres_strategy;
127 } rs6000_stack_t;
129 /* A C structure for machine-specific, per-function data.
130 This is added to the cfun structure. */
131 typedef struct GTY(()) machine_function
133 /* Whether the instruction chain has been scanned already. */
134 int spe_insn_chain_scanned_p;
135 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
136 int ra_needs_full_frame;
137 /* Flags if __builtin_return_address (0) was used. */
138 int ra_need_lr;
139 /* Cache lr_save_p after expansion of builtin_eh_return. */
140 int lr_save_state;
141 /* Whether we need to save the TOC to the reserved stack location in the
142 function prologue. */
143 bool save_toc_in_prologue;
144 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
145 varargs save area. */
146 HOST_WIDE_INT varargs_save_offset;
147 /* Temporary stack slot to use for SDmode copies. This slot is
148 64-bits wide and is allocated early enough so that the offset
149 does not overflow the 16-bit load/store offset field. */
150 rtx sdmode_stack_slot;
151 /* Alternative internal arg pointer for -fsplit-stack. */
152 rtx split_stack_arg_pointer;
153 bool split_stack_argp_used;
154 /* Flag if r2 setup is needed with ELFv2 ABI. */
155 bool r2_setup_needed;
156 /* The components already handled by separate shrink-wrapping, which should
157 not be considered by the prologue and epilogue. */
158 bool gpr_is_wrapped_separately[32];
159 bool lr_is_wrapped_separately;
160 } machine_function;
162 /* Support targetm.vectorize.builtin_mask_for_load. */
163 static GTY(()) tree altivec_builtin_mask_for_load;
165 /* Set to nonzero once AIX common-mode calls have been defined. */
166 static GTY(()) int common_mode_defined;
168 /* Label number of label created for -mrelocatable, to call to so we can
169 get the address of the GOT section */
170 static int rs6000_pic_labelno;
172 #ifdef USING_ELFOS_H
173 /* Counter for labels which are to be placed in .fixup. */
174 int fixuplabelno = 0;
175 #endif
177 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
178 int dot_symbols;
180 /* Specify the machine mode that pointers have. After generation of rtl, the
181 compiler makes no further distinction between pointers and any other objects
182 of this machine mode. The type is unsigned since not all things that
183 include rs6000.h also include machmode.h. */
184 unsigned rs6000_pmode;
186 /* Width in bits of a pointer. */
187 unsigned rs6000_pointer_size;
189 #ifdef HAVE_AS_GNU_ATTRIBUTE
190 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
191 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
192 # endif
193 /* Flag whether floating point values have been passed/returned.
194 Note that this doesn't say whether fprs are used, since the
195 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
196 should be set for soft-float values passed in gprs and ieee128
197 values passed in vsx registers. */
198 static bool rs6000_passes_float;
199 static bool rs6000_passes_long_double;
200 /* Flag whether vector values have been passed/returned. */
201 static bool rs6000_passes_vector;
202 /* Flag whether small (<= 8 byte) structures have been returned. */
203 static bool rs6000_returns_struct;
204 #endif
206 /* Value is TRUE if register/mode pair is acceptable. */
207 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
209 /* Maximum number of registers needed for a given register class and mode. */
210 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
212 /* How many registers are needed for a given register and mode. */
213 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
215 /* Map register number to register class. */
216 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
218 static int dbg_cost_ctrl;
220 /* Built in types. */
221 tree rs6000_builtin_types[RS6000_BTI_MAX];
222 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
224 /* Flag to say the TOC is initialized */
225 int toc_initialized, need_toc_init;
226 char toc_label_name[10];
228 /* Cached value of rs6000_variable_issue. This is cached in
229 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
230 static short cached_can_issue_more;
232 static GTY(()) section *read_only_data_section;
233 static GTY(()) section *private_data_section;
234 static GTY(()) section *tls_data_section;
235 static GTY(()) section *tls_private_data_section;
236 static GTY(()) section *read_only_private_data_section;
237 static GTY(()) section *sdata2_section;
238 static GTY(()) section *toc_section;
240 struct builtin_description
242 const HOST_WIDE_INT mask;
243 const enum insn_code icode;
244 const char *const name;
245 const enum rs6000_builtins code;
248 /* Describe the vector unit used for modes. */
249 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
250 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
252 /* Register classes for various constraints that are based on the target
253 switches. */
254 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
256 /* Describe the alignment of a vector. */
257 int rs6000_vector_align[NUM_MACHINE_MODES];
259 /* Map selected modes to types for builtins. */
260 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
262 /* What modes to automatically generate reciprocal divide estimate (fre) and
263 reciprocal sqrt (frsqrte) for. */
264 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
266 /* Masks to determine which reciprocal esitmate instructions to generate
267 automatically. */
268 enum rs6000_recip_mask {
269 RECIP_SF_DIV = 0x001, /* Use divide estimate */
270 RECIP_DF_DIV = 0x002,
271 RECIP_V4SF_DIV = 0x004,
272 RECIP_V2DF_DIV = 0x008,
274 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
275 RECIP_DF_RSQRT = 0x020,
276 RECIP_V4SF_RSQRT = 0x040,
277 RECIP_V2DF_RSQRT = 0x080,
279 /* Various combination of flags for -mrecip=xxx. */
280 RECIP_NONE = 0,
281 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
282 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
283 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
285 RECIP_HIGH_PRECISION = RECIP_ALL,
287 /* On low precision machines like the power5, don't enable double precision
288 reciprocal square root estimate, since it isn't accurate enough. */
289 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
292 /* -mrecip options. */
293 static struct
295 const char *string; /* option name */
296 unsigned int mask; /* mask bits to set */
297 } recip_options[] = {
298 { "all", RECIP_ALL },
299 { "none", RECIP_NONE },
300 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
301 | RECIP_V2DF_DIV) },
302 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
303 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
304 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
305 | RECIP_V2DF_RSQRT) },
306 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
307 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
310 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
311 static const struct
313 const char *cpu;
314 unsigned int cpuid;
315 } cpu_is_info[] = {
316 { "power9", PPC_PLATFORM_POWER9 },
317 { "power8", PPC_PLATFORM_POWER8 },
318 { "power7", PPC_PLATFORM_POWER7 },
319 { "power6x", PPC_PLATFORM_POWER6X },
320 { "power6", PPC_PLATFORM_POWER6 },
321 { "power5+", PPC_PLATFORM_POWER5_PLUS },
322 { "power5", PPC_PLATFORM_POWER5 },
323 { "ppc970", PPC_PLATFORM_PPC970 },
324 { "power4", PPC_PLATFORM_POWER4 },
325 { "ppca2", PPC_PLATFORM_PPCA2 },
326 { "ppc476", PPC_PLATFORM_PPC476 },
327 { "ppc464", PPC_PLATFORM_PPC464 },
328 { "ppc440", PPC_PLATFORM_PPC440 },
329 { "ppc405", PPC_PLATFORM_PPC405 },
330 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
333 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
334 static const struct
336 const char *hwcap;
337 int mask;
338 unsigned int id;
339 } cpu_supports_info[] = {
340 /* AT_HWCAP masks. */
341 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
342 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
343 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
344 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
345 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
346 { "booke", PPC_FEATURE_BOOKE, 0 },
347 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
348 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
349 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
350 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
351 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
352 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
353 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
354 { "notb", PPC_FEATURE_NO_TB, 0 },
355 { "pa6t", PPC_FEATURE_PA6T, 0 },
356 { "power4", PPC_FEATURE_POWER4, 0 },
357 { "power5", PPC_FEATURE_POWER5, 0 },
358 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
359 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
360 { "ppc32", PPC_FEATURE_32, 0 },
361 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
362 { "ppc64", PPC_FEATURE_64, 0 },
363 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
364 { "smt", PPC_FEATURE_SMT, 0 },
365 { "spe", PPC_FEATURE_HAS_SPE, 0 },
366 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
367 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
368 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
370 /* AT_HWCAP2 masks. */
371 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
372 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
373 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
374 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
375 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
376 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
377 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
378 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
379 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
380 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
383 /* Newer LIBCs explicitly export this symbol to declare that they provide
384 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
385 reference to this symbol whenever we expand a CPU builtin, so that
386 we never link against an old LIBC. */
387 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
389 /* True if we have expanded a CPU builtin. */
390 bool cpu_builtin_p;
392 /* Pointer to function (in rs6000-c.c) that can define or undefine target
393 macros that have changed. Languages that don't support the preprocessor
394 don't link in rs6000-c.c, so we can't call it directly. */
395 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
397 /* Simplfy register classes into simpler classifications. We assume
398 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
399 check for standard register classes (gpr/floating/altivec/vsx) and
400 floating/vector classes (float/altivec/vsx). */
402 enum rs6000_reg_type {
403 NO_REG_TYPE,
404 PSEUDO_REG_TYPE,
405 GPR_REG_TYPE,
406 VSX_REG_TYPE,
407 ALTIVEC_REG_TYPE,
408 FPR_REG_TYPE,
409 SPR_REG_TYPE,
410 CR_REG_TYPE,
411 SPE_ACC_TYPE,
412 SPEFSCR_REG_TYPE
415 /* Map register class to register type. */
416 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
418 /* First/last register type for the 'normal' register types (i.e. general
419 purpose, floating point, altivec, and VSX registers). */
420 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
422 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
425 /* Register classes we care about in secondary reload or go if legitimate
426 address. We only need to worry about GPR, FPR, and Altivec registers here,
427 along an ANY field that is the OR of the 3 register classes. */
429 enum rs6000_reload_reg_type {
430 RELOAD_REG_GPR, /* General purpose registers. */
431 RELOAD_REG_FPR, /* Traditional floating point regs. */
432 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
433 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
434 N_RELOAD_REG
437 /* For setting up register classes, loop through the 3 register classes mapping
438 into real registers, and skip the ANY class, which is just an OR of the
439 bits. */
440 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
441 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
443 /* Map reload register type to a register in the register class. */
444 struct reload_reg_map_type {
445 const char *name; /* Register class name. */
446 int reg; /* Register in the register class. */
449 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
450 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
451 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
452 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
453 { "Any", -1 }, /* RELOAD_REG_ANY. */
456 /* Mask bits for each register class, indexed per mode. Historically the
457 compiler has been more restrictive which types can do PRE_MODIFY instead of
458 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
459 typedef unsigned char addr_mask_type;
461 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
462 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
463 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
464 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
465 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
466 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
467 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
468 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
470 /* Register type masks based on the type, of valid addressing modes. */
471 struct rs6000_reg_addr {
472 enum insn_code reload_load; /* INSN to reload for loading. */
473 enum insn_code reload_store; /* INSN to reload for storing. */
474 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
475 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
476 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
477 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
478 /* INSNs for fusing addi with loads
479 or stores for each reg. class. */
480 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
481 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
482 /* INSNs for fusing addis with loads
483 or stores for each reg. class. */
484 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
485 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
486 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
487 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
488 bool fused_toc; /* Mode supports TOC fusion. */
491 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
493 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
494 static inline bool
495 mode_supports_pre_incdec_p (machine_mode mode)
497 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
498 != 0);
501 /* Helper function to say whether a mode supports PRE_MODIFY. */
502 static inline bool
503 mode_supports_pre_modify_p (machine_mode mode)
505 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
506 != 0);
509 /* Return true if we have D-form addressing in altivec registers. */
510 static inline bool
511 mode_supports_vmx_dform (machine_mode mode)
513 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
516 /* Return true if we have D-form addressing in VSX registers. This addressing
517 is more limited than normal d-form addressing in that the offset must be
518 aligned on a 16-byte boundary. */
519 static inline bool
520 mode_supports_vsx_dform_quad (machine_mode mode)
522 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
523 != 0);
527 /* Target cpu costs. */
529 struct processor_costs {
530 const int mulsi; /* cost of SImode multiplication. */
531 const int mulsi_const; /* cost of SImode multiplication by constant. */
532 const int mulsi_const9; /* cost of SImode mult by short constant. */
533 const int muldi; /* cost of DImode multiplication. */
534 const int divsi; /* cost of SImode division. */
535 const int divdi; /* cost of DImode division. */
536 const int fp; /* cost of simple SFmode and DFmode insns. */
537 const int dmul; /* cost of DFmode multiplication (and fmadd). */
538 const int sdiv; /* cost of SFmode division (fdivs). */
539 const int ddiv; /* cost of DFmode division (fdiv). */
540 const int cache_line_size; /* cache line size in bytes. */
541 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
542 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
543 const int simultaneous_prefetches; /* number of parallel prefetch
544 operations. */
545 const int sfdf_convert; /* cost of SF->DF conversion. */
548 const struct processor_costs *rs6000_cost;
550 /* Processor costs (relative to an add) */
552 /* Instruction size costs on 32bit processors. */
553 static const
554 struct processor_costs size32_cost = {
555 COSTS_N_INSNS (1), /* mulsi */
556 COSTS_N_INSNS (1), /* mulsi_const */
557 COSTS_N_INSNS (1), /* mulsi_const9 */
558 COSTS_N_INSNS (1), /* muldi */
559 COSTS_N_INSNS (1), /* divsi */
560 COSTS_N_INSNS (1), /* divdi */
561 COSTS_N_INSNS (1), /* fp */
562 COSTS_N_INSNS (1), /* dmul */
563 COSTS_N_INSNS (1), /* sdiv */
564 COSTS_N_INSNS (1), /* ddiv */
565 32, /* cache line size */
566 0, /* l1 cache */
567 0, /* l2 cache */
568 0, /* streams */
569 0, /* SF->DF convert */
572 /* Instruction size costs on 64bit processors. */
573 static const
574 struct processor_costs size64_cost = {
575 COSTS_N_INSNS (1), /* mulsi */
576 COSTS_N_INSNS (1), /* mulsi_const */
577 COSTS_N_INSNS (1), /* mulsi_const9 */
578 COSTS_N_INSNS (1), /* muldi */
579 COSTS_N_INSNS (1), /* divsi */
580 COSTS_N_INSNS (1), /* divdi */
581 COSTS_N_INSNS (1), /* fp */
582 COSTS_N_INSNS (1), /* dmul */
583 COSTS_N_INSNS (1), /* sdiv */
584 COSTS_N_INSNS (1), /* ddiv */
585 128, /* cache line size */
586 0, /* l1 cache */
587 0, /* l2 cache */
588 0, /* streams */
589 0, /* SF->DF convert */
592 /* Instruction costs on RS64A processors. */
593 static const
594 struct processor_costs rs64a_cost = {
595 COSTS_N_INSNS (20), /* mulsi */
596 COSTS_N_INSNS (12), /* mulsi_const */
597 COSTS_N_INSNS (8), /* mulsi_const9 */
598 COSTS_N_INSNS (34), /* muldi */
599 COSTS_N_INSNS (65), /* divsi */
600 COSTS_N_INSNS (67), /* divdi */
601 COSTS_N_INSNS (4), /* fp */
602 COSTS_N_INSNS (4), /* dmul */
603 COSTS_N_INSNS (31), /* sdiv */
604 COSTS_N_INSNS (31), /* ddiv */
605 128, /* cache line size */
606 128, /* l1 cache */
607 2048, /* l2 cache */
608 1, /* streams */
609 0, /* SF->DF convert */
612 /* Instruction costs on MPCCORE processors. */
613 static const
614 struct processor_costs mpccore_cost = {
615 COSTS_N_INSNS (2), /* mulsi */
616 COSTS_N_INSNS (2), /* mulsi_const */
617 COSTS_N_INSNS (2), /* mulsi_const9 */
618 COSTS_N_INSNS (2), /* muldi */
619 COSTS_N_INSNS (6), /* divsi */
620 COSTS_N_INSNS (6), /* divdi */
621 COSTS_N_INSNS (4), /* fp */
622 COSTS_N_INSNS (5), /* dmul */
623 COSTS_N_INSNS (10), /* sdiv */
624 COSTS_N_INSNS (17), /* ddiv */
625 32, /* cache line size */
626 4, /* l1 cache */
627 16, /* l2 cache */
628 1, /* streams */
629 0, /* SF->DF convert */
632 /* Instruction costs on PPC403 processors. */
633 static const
634 struct processor_costs ppc403_cost = {
635 COSTS_N_INSNS (4), /* mulsi */
636 COSTS_N_INSNS (4), /* mulsi_const */
637 COSTS_N_INSNS (4), /* mulsi_const9 */
638 COSTS_N_INSNS (4), /* muldi */
639 COSTS_N_INSNS (33), /* divsi */
640 COSTS_N_INSNS (33), /* divdi */
641 COSTS_N_INSNS (11), /* fp */
642 COSTS_N_INSNS (11), /* dmul */
643 COSTS_N_INSNS (11), /* sdiv */
644 COSTS_N_INSNS (11), /* ddiv */
645 32, /* cache line size */
646 4, /* l1 cache */
647 16, /* l2 cache */
648 1, /* streams */
649 0, /* SF->DF convert */
652 /* Instruction costs on PPC405 processors. */
653 static const
654 struct processor_costs ppc405_cost = {
655 COSTS_N_INSNS (5), /* mulsi */
656 COSTS_N_INSNS (4), /* mulsi_const */
657 COSTS_N_INSNS (3), /* mulsi_const9 */
658 COSTS_N_INSNS (5), /* muldi */
659 COSTS_N_INSNS (35), /* divsi */
660 COSTS_N_INSNS (35), /* divdi */
661 COSTS_N_INSNS (11), /* fp */
662 COSTS_N_INSNS (11), /* dmul */
663 COSTS_N_INSNS (11), /* sdiv */
664 COSTS_N_INSNS (11), /* ddiv */
665 32, /* cache line size */
666 16, /* l1 cache */
667 128, /* l2 cache */
668 1, /* streams */
669 0, /* SF->DF convert */
672 /* Instruction costs on PPC440 processors. */
673 static const
674 struct processor_costs ppc440_cost = {
675 COSTS_N_INSNS (3), /* mulsi */
676 COSTS_N_INSNS (2), /* mulsi_const */
677 COSTS_N_INSNS (2), /* mulsi_const9 */
678 COSTS_N_INSNS (3), /* muldi */
679 COSTS_N_INSNS (34), /* divsi */
680 COSTS_N_INSNS (34), /* divdi */
681 COSTS_N_INSNS (5), /* fp */
682 COSTS_N_INSNS (5), /* dmul */
683 COSTS_N_INSNS (19), /* sdiv */
684 COSTS_N_INSNS (33), /* ddiv */
685 32, /* cache line size */
686 32, /* l1 cache */
687 256, /* l2 cache */
688 1, /* streams */
689 0, /* SF->DF convert */
692 /* Instruction costs on PPC476 processors. */
693 static const
694 struct processor_costs ppc476_cost = {
695 COSTS_N_INSNS (4), /* mulsi */
696 COSTS_N_INSNS (4), /* mulsi_const */
697 COSTS_N_INSNS (4), /* mulsi_const9 */
698 COSTS_N_INSNS (4), /* muldi */
699 COSTS_N_INSNS (11), /* divsi */
700 COSTS_N_INSNS (11), /* divdi */
701 COSTS_N_INSNS (6), /* fp */
702 COSTS_N_INSNS (6), /* dmul */
703 COSTS_N_INSNS (19), /* sdiv */
704 COSTS_N_INSNS (33), /* ddiv */
705 32, /* l1 cache line size */
706 32, /* l1 cache */
707 512, /* l2 cache */
708 1, /* streams */
709 0, /* SF->DF convert */
712 /* Instruction costs on PPC601 processors. */
713 static const
714 struct processor_costs ppc601_cost = {
715 COSTS_N_INSNS (5), /* mulsi */
716 COSTS_N_INSNS (5), /* mulsi_const */
717 COSTS_N_INSNS (5), /* mulsi_const9 */
718 COSTS_N_INSNS (5), /* muldi */
719 COSTS_N_INSNS (36), /* divsi */
720 COSTS_N_INSNS (36), /* divdi */
721 COSTS_N_INSNS (4), /* fp */
722 COSTS_N_INSNS (5), /* dmul */
723 COSTS_N_INSNS (17), /* sdiv */
724 COSTS_N_INSNS (31), /* ddiv */
725 32, /* cache line size */
726 32, /* l1 cache */
727 256, /* l2 cache */
728 1, /* streams */
729 0, /* SF->DF convert */
732 /* Instruction costs on PPC603 processors. */
733 static const
734 struct processor_costs ppc603_cost = {
735 COSTS_N_INSNS (5), /* mulsi */
736 COSTS_N_INSNS (3), /* mulsi_const */
737 COSTS_N_INSNS (2), /* mulsi_const9 */
738 COSTS_N_INSNS (5), /* muldi */
739 COSTS_N_INSNS (37), /* divsi */
740 COSTS_N_INSNS (37), /* divdi */
741 COSTS_N_INSNS (3), /* fp */
742 COSTS_N_INSNS (4), /* dmul */
743 COSTS_N_INSNS (18), /* sdiv */
744 COSTS_N_INSNS (33), /* ddiv */
745 32, /* cache line size */
746 8, /* l1 cache */
747 64, /* l2 cache */
748 1, /* streams */
749 0, /* SF->DF convert */
752 /* Instruction costs on PPC604 processors. */
753 static const
754 struct processor_costs ppc604_cost = {
755 COSTS_N_INSNS (4), /* mulsi */
756 COSTS_N_INSNS (4), /* mulsi_const */
757 COSTS_N_INSNS (4), /* mulsi_const9 */
758 COSTS_N_INSNS (4), /* muldi */
759 COSTS_N_INSNS (20), /* divsi */
760 COSTS_N_INSNS (20), /* divdi */
761 COSTS_N_INSNS (3), /* fp */
762 COSTS_N_INSNS (3), /* dmul */
763 COSTS_N_INSNS (18), /* sdiv */
764 COSTS_N_INSNS (32), /* ddiv */
765 32, /* cache line size */
766 16, /* l1 cache */
767 512, /* l2 cache */
768 1, /* streams */
769 0, /* SF->DF convert */
772 /* Instruction costs on PPC604e processors. */
773 static const
774 struct processor_costs ppc604e_cost = {
775 COSTS_N_INSNS (2), /* mulsi */
776 COSTS_N_INSNS (2), /* mulsi_const */
777 COSTS_N_INSNS (2), /* mulsi_const9 */
778 COSTS_N_INSNS (2), /* muldi */
779 COSTS_N_INSNS (20), /* divsi */
780 COSTS_N_INSNS (20), /* divdi */
781 COSTS_N_INSNS (3), /* fp */
782 COSTS_N_INSNS (3), /* dmul */
783 COSTS_N_INSNS (18), /* sdiv */
784 COSTS_N_INSNS (32), /* ddiv */
785 32, /* cache line size */
786 32, /* l1 cache */
787 1024, /* l2 cache */
788 1, /* streams */
789 0, /* SF->DF convert */
792 /* Instruction costs on PPC620 processors. */
793 static const
794 struct processor_costs ppc620_cost = {
795 COSTS_N_INSNS (5), /* mulsi */
796 COSTS_N_INSNS (4), /* mulsi_const */
797 COSTS_N_INSNS (3), /* mulsi_const9 */
798 COSTS_N_INSNS (7), /* muldi */
799 COSTS_N_INSNS (21), /* divsi */
800 COSTS_N_INSNS (37), /* divdi */
801 COSTS_N_INSNS (3), /* fp */
802 COSTS_N_INSNS (3), /* dmul */
803 COSTS_N_INSNS (18), /* sdiv */
804 COSTS_N_INSNS (32), /* ddiv */
805 128, /* cache line size */
806 32, /* l1 cache */
807 1024, /* l2 cache */
808 1, /* streams */
809 0, /* SF->DF convert */
812 /* Instruction costs on PPC630 processors. */
813 static const
814 struct processor_costs ppc630_cost = {
815 COSTS_N_INSNS (5), /* mulsi */
816 COSTS_N_INSNS (4), /* mulsi_const */
817 COSTS_N_INSNS (3), /* mulsi_const9 */
818 COSTS_N_INSNS (7), /* muldi */
819 COSTS_N_INSNS (21), /* divsi */
820 COSTS_N_INSNS (37), /* divdi */
821 COSTS_N_INSNS (3), /* fp */
822 COSTS_N_INSNS (3), /* dmul */
823 COSTS_N_INSNS (17), /* sdiv */
824 COSTS_N_INSNS (21), /* ddiv */
825 128, /* cache line size */
826 64, /* l1 cache */
827 1024, /* l2 cache */
828 1, /* streams */
829 0, /* SF->DF convert */
832 /* Instruction costs on Cell processor. */
833 /* COSTS_N_INSNS (1) ~ one add. */
834 static const
835 struct processor_costs ppccell_cost = {
836 COSTS_N_INSNS (9/2)+2, /* mulsi */
837 COSTS_N_INSNS (6/2), /* mulsi_const */
838 COSTS_N_INSNS (6/2), /* mulsi_const9 */
839 COSTS_N_INSNS (15/2)+2, /* muldi */
840 COSTS_N_INSNS (38/2), /* divsi */
841 COSTS_N_INSNS (70/2), /* divdi */
842 COSTS_N_INSNS (10/2), /* fp */
843 COSTS_N_INSNS (10/2), /* dmul */
844 COSTS_N_INSNS (74/2), /* sdiv */
845 COSTS_N_INSNS (74/2), /* ddiv */
846 128, /* cache line size */
847 32, /* l1 cache */
848 512, /* l2 cache */
849 6, /* streams */
850 0, /* SF->DF convert */
853 /* Instruction costs on PPC750 and PPC7400 processors. */
854 static const
855 struct processor_costs ppc750_cost = {
856 COSTS_N_INSNS (5), /* mulsi */
857 COSTS_N_INSNS (3), /* mulsi_const */
858 COSTS_N_INSNS (2), /* mulsi_const9 */
859 COSTS_N_INSNS (5), /* muldi */
860 COSTS_N_INSNS (17), /* divsi */
861 COSTS_N_INSNS (17), /* divdi */
862 COSTS_N_INSNS (3), /* fp */
863 COSTS_N_INSNS (3), /* dmul */
864 COSTS_N_INSNS (17), /* sdiv */
865 COSTS_N_INSNS (31), /* ddiv */
866 32, /* cache line size */
867 32, /* l1 cache */
868 512, /* l2 cache */
869 1, /* streams */
870 0, /* SF->DF convert */
873 /* Instruction costs on PPC7450 processors. */
874 static const
875 struct processor_costs ppc7450_cost = {
876 COSTS_N_INSNS (4), /* mulsi */
877 COSTS_N_INSNS (3), /* mulsi_const */
878 COSTS_N_INSNS (3), /* mulsi_const9 */
879 COSTS_N_INSNS (4), /* muldi */
880 COSTS_N_INSNS (23), /* divsi */
881 COSTS_N_INSNS (23), /* divdi */
882 COSTS_N_INSNS (5), /* fp */
883 COSTS_N_INSNS (5), /* dmul */
884 COSTS_N_INSNS (21), /* sdiv */
885 COSTS_N_INSNS (35), /* ddiv */
886 32, /* cache line size */
887 32, /* l1 cache */
888 1024, /* l2 cache */
889 1, /* streams */
890 0, /* SF->DF convert */
893 /* Instruction costs on PPC8540 processors. */
894 static const
895 struct processor_costs ppc8540_cost = {
896 COSTS_N_INSNS (4), /* mulsi */
897 COSTS_N_INSNS (4), /* mulsi_const */
898 COSTS_N_INSNS (4), /* mulsi_const9 */
899 COSTS_N_INSNS (4), /* muldi */
900 COSTS_N_INSNS (19), /* divsi */
901 COSTS_N_INSNS (19), /* divdi */
902 COSTS_N_INSNS (4), /* fp */
903 COSTS_N_INSNS (4), /* dmul */
904 COSTS_N_INSNS (29), /* sdiv */
905 COSTS_N_INSNS (29), /* ddiv */
906 32, /* cache line size */
907 32, /* l1 cache */
908 256, /* l2 cache */
909 1, /* prefetch streams /*/
910 0, /* SF->DF convert */
913 /* Instruction costs on E300C2 and E300C3 cores. */
914 static const
915 struct processor_costs ppce300c2c3_cost = {
916 COSTS_N_INSNS (4), /* mulsi */
917 COSTS_N_INSNS (4), /* mulsi_const */
918 COSTS_N_INSNS (4), /* mulsi_const9 */
919 COSTS_N_INSNS (4), /* muldi */
920 COSTS_N_INSNS (19), /* divsi */
921 COSTS_N_INSNS (19), /* divdi */
922 COSTS_N_INSNS (3), /* fp */
923 COSTS_N_INSNS (4), /* dmul */
924 COSTS_N_INSNS (18), /* sdiv */
925 COSTS_N_INSNS (33), /* ddiv */
927 16, /* l1 cache */
928 16, /* l2 cache */
929 1, /* prefetch streams /*/
930 0, /* SF->DF convert */
933 /* Instruction costs on PPCE500MC processors. */
934 static const
935 struct processor_costs ppce500mc_cost = {
936 COSTS_N_INSNS (4), /* mulsi */
937 COSTS_N_INSNS (4), /* mulsi_const */
938 COSTS_N_INSNS (4), /* mulsi_const9 */
939 COSTS_N_INSNS (4), /* muldi */
940 COSTS_N_INSNS (14), /* divsi */
941 COSTS_N_INSNS (14), /* divdi */
942 COSTS_N_INSNS (8), /* fp */
943 COSTS_N_INSNS (10), /* dmul */
944 COSTS_N_INSNS (36), /* sdiv */
945 COSTS_N_INSNS (66), /* ddiv */
946 64, /* cache line size */
947 32, /* l1 cache */
948 128, /* l2 cache */
949 1, /* prefetch streams /*/
950 0, /* SF->DF convert */
953 /* Instruction costs on PPCE500MC64 processors. */
954 static const
955 struct processor_costs ppce500mc64_cost = {
956 COSTS_N_INSNS (4), /* mulsi */
957 COSTS_N_INSNS (4), /* mulsi_const */
958 COSTS_N_INSNS (4), /* mulsi_const9 */
959 COSTS_N_INSNS (4), /* muldi */
960 COSTS_N_INSNS (14), /* divsi */
961 COSTS_N_INSNS (14), /* divdi */
962 COSTS_N_INSNS (4), /* fp */
963 COSTS_N_INSNS (10), /* dmul */
964 COSTS_N_INSNS (36), /* sdiv */
965 COSTS_N_INSNS (66), /* ddiv */
966 64, /* cache line size */
967 32, /* l1 cache */
968 128, /* l2 cache */
969 1, /* prefetch streams /*/
970 0, /* SF->DF convert */
973 /* Instruction costs on PPCE5500 processors. */
974 static const
975 struct processor_costs ppce5500_cost = {
976 COSTS_N_INSNS (5), /* mulsi */
977 COSTS_N_INSNS (5), /* mulsi_const */
978 COSTS_N_INSNS (4), /* mulsi_const9 */
979 COSTS_N_INSNS (5), /* muldi */
980 COSTS_N_INSNS (14), /* divsi */
981 COSTS_N_INSNS (14), /* divdi */
982 COSTS_N_INSNS (7), /* fp */
983 COSTS_N_INSNS (10), /* dmul */
984 COSTS_N_INSNS (36), /* sdiv */
985 COSTS_N_INSNS (66), /* ddiv */
986 64, /* cache line size */
987 32, /* l1 cache */
988 128, /* l2 cache */
989 1, /* prefetch streams /*/
990 0, /* SF->DF convert */
993 /* Instruction costs on PPCE6500 processors. */
994 static const
995 struct processor_costs ppce6500_cost = {
996 COSTS_N_INSNS (5), /* mulsi */
997 COSTS_N_INSNS (5), /* mulsi_const */
998 COSTS_N_INSNS (4), /* mulsi_const9 */
999 COSTS_N_INSNS (5), /* muldi */
1000 COSTS_N_INSNS (14), /* divsi */
1001 COSTS_N_INSNS (14), /* divdi */
1002 COSTS_N_INSNS (7), /* fp */
1003 COSTS_N_INSNS (10), /* dmul */
1004 COSTS_N_INSNS (36), /* sdiv */
1005 COSTS_N_INSNS (66), /* ddiv */
1006 64, /* cache line size */
1007 32, /* l1 cache */
1008 128, /* l2 cache */
1009 1, /* prefetch streams /*/
1010 0, /* SF->DF convert */
1013 /* Instruction costs on AppliedMicro Titan processors. */
1014 static const
1015 struct processor_costs titan_cost = {
1016 COSTS_N_INSNS (5), /* mulsi */
1017 COSTS_N_INSNS (5), /* mulsi_const */
1018 COSTS_N_INSNS (5), /* mulsi_const9 */
1019 COSTS_N_INSNS (5), /* muldi */
1020 COSTS_N_INSNS (18), /* divsi */
1021 COSTS_N_INSNS (18), /* divdi */
1022 COSTS_N_INSNS (10), /* fp */
1023 COSTS_N_INSNS (10), /* dmul */
1024 COSTS_N_INSNS (46), /* sdiv */
1025 COSTS_N_INSNS (72), /* ddiv */
1026 32, /* cache line size */
1027 32, /* l1 cache */
1028 512, /* l2 cache */
1029 1, /* prefetch streams /*/
1030 0, /* SF->DF convert */
1033 /* Instruction costs on POWER4 and POWER5 processors. */
1034 static const
1035 struct processor_costs power4_cost = {
1036 COSTS_N_INSNS (3), /* mulsi */
1037 COSTS_N_INSNS (2), /* mulsi_const */
1038 COSTS_N_INSNS (2), /* mulsi_const9 */
1039 COSTS_N_INSNS (4), /* muldi */
1040 COSTS_N_INSNS (18), /* divsi */
1041 COSTS_N_INSNS (34), /* divdi */
1042 COSTS_N_INSNS (3), /* fp */
1043 COSTS_N_INSNS (3), /* dmul */
1044 COSTS_N_INSNS (17), /* sdiv */
1045 COSTS_N_INSNS (17), /* ddiv */
1046 128, /* cache line size */
1047 32, /* l1 cache */
1048 1024, /* l2 cache */
1049 8, /* prefetch streams /*/
1050 0, /* SF->DF convert */
1053 /* Instruction costs on POWER6 processors. */
1054 static const
1055 struct processor_costs power6_cost = {
1056 COSTS_N_INSNS (8), /* mulsi */
1057 COSTS_N_INSNS (8), /* mulsi_const */
1058 COSTS_N_INSNS (8), /* mulsi_const9 */
1059 COSTS_N_INSNS (8), /* muldi */
1060 COSTS_N_INSNS (22), /* divsi */
1061 COSTS_N_INSNS (28), /* divdi */
1062 COSTS_N_INSNS (3), /* fp */
1063 COSTS_N_INSNS (3), /* dmul */
1064 COSTS_N_INSNS (13), /* sdiv */
1065 COSTS_N_INSNS (16), /* ddiv */
1066 128, /* cache line size */
1067 64, /* l1 cache */
1068 2048, /* l2 cache */
1069 16, /* prefetch streams */
1070 0, /* SF->DF convert */
1073 /* Instruction costs on POWER7 processors. */
1074 static const
1075 struct processor_costs power7_cost = {
1076 COSTS_N_INSNS (2), /* mulsi */
1077 COSTS_N_INSNS (2), /* mulsi_const */
1078 COSTS_N_INSNS (2), /* mulsi_const9 */
1079 COSTS_N_INSNS (2), /* muldi */
1080 COSTS_N_INSNS (18), /* divsi */
1081 COSTS_N_INSNS (34), /* divdi */
1082 COSTS_N_INSNS (3), /* fp */
1083 COSTS_N_INSNS (3), /* dmul */
1084 COSTS_N_INSNS (13), /* sdiv */
1085 COSTS_N_INSNS (16), /* ddiv */
1086 128, /* cache line size */
1087 32, /* l1 cache */
1088 256, /* l2 cache */
1089 12, /* prefetch streams */
1090 COSTS_N_INSNS (3), /* SF->DF convert */
1093 /* Instruction costs on POWER8 processors. */
1094 static const
1095 struct processor_costs power8_cost = {
1096 COSTS_N_INSNS (3), /* mulsi */
1097 COSTS_N_INSNS (3), /* mulsi_const */
1098 COSTS_N_INSNS (3), /* mulsi_const9 */
1099 COSTS_N_INSNS (3), /* muldi */
1100 COSTS_N_INSNS (19), /* divsi */
1101 COSTS_N_INSNS (35), /* divdi */
1102 COSTS_N_INSNS (3), /* fp */
1103 COSTS_N_INSNS (3), /* dmul */
1104 COSTS_N_INSNS (14), /* sdiv */
1105 COSTS_N_INSNS (17), /* ddiv */
1106 128, /* cache line size */
1107 32, /* l1 cache */
1108 256, /* l2 cache */
1109 12, /* prefetch streams */
1110 COSTS_N_INSNS (3), /* SF->DF convert */
1113 /* Instruction costs on POWER9 processors. */
1114 static const
1115 struct processor_costs power9_cost = {
1116 COSTS_N_INSNS (3), /* mulsi */
1117 COSTS_N_INSNS (3), /* mulsi_const */
1118 COSTS_N_INSNS (3), /* mulsi_const9 */
1119 COSTS_N_INSNS (3), /* muldi */
1120 COSTS_N_INSNS (8), /* divsi */
1121 COSTS_N_INSNS (12), /* divdi */
1122 COSTS_N_INSNS (3), /* fp */
1123 COSTS_N_INSNS (3), /* dmul */
1124 COSTS_N_INSNS (13), /* sdiv */
1125 COSTS_N_INSNS (18), /* ddiv */
1126 128, /* cache line size */
1127 32, /* l1 cache */
1128 512, /* l2 cache */
1129 8, /* prefetch streams */
1130 COSTS_N_INSNS (3), /* SF->DF convert */
1133 /* Instruction costs on POWER A2 processors. */
1134 static const
1135 struct processor_costs ppca2_cost = {
1136 COSTS_N_INSNS (16), /* mulsi */
1137 COSTS_N_INSNS (16), /* mulsi_const */
1138 COSTS_N_INSNS (16), /* mulsi_const9 */
1139 COSTS_N_INSNS (16), /* muldi */
1140 COSTS_N_INSNS (22), /* divsi */
1141 COSTS_N_INSNS (28), /* divdi */
1142 COSTS_N_INSNS (3), /* fp */
1143 COSTS_N_INSNS (3), /* dmul */
1144 COSTS_N_INSNS (59), /* sdiv */
1145 COSTS_N_INSNS (72), /* ddiv */
1147 16, /* l1 cache */
1148 2048, /* l2 cache */
1149 16, /* prefetch streams */
1150 0, /* SF->DF convert */
1154 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1155 #undef RS6000_BUILTIN_0
1156 #undef RS6000_BUILTIN_1
1157 #undef RS6000_BUILTIN_2
1158 #undef RS6000_BUILTIN_3
1159 #undef RS6000_BUILTIN_A
1160 #undef RS6000_BUILTIN_D
1161 #undef RS6000_BUILTIN_E
1162 #undef RS6000_BUILTIN_H
1163 #undef RS6000_BUILTIN_P
1164 #undef RS6000_BUILTIN_Q
1165 #undef RS6000_BUILTIN_S
1166 #undef RS6000_BUILTIN_X
1168 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1169 { NAME, ICODE, MASK, ATTR },
1171 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1172 { NAME, ICODE, MASK, ATTR },
1174 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1175 { NAME, ICODE, MASK, ATTR },
1177 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1178 { NAME, ICODE, MASK, ATTR },
1180 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1181 { NAME, ICODE, MASK, ATTR },
1183 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1184 { NAME, ICODE, MASK, ATTR },
1186 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1187 { NAME, ICODE, MASK, ATTR },
1189 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1190 { NAME, ICODE, MASK, ATTR },
1192 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1193 { NAME, ICODE, MASK, ATTR },
1195 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1196 { NAME, ICODE, MASK, ATTR },
1198 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1199 { NAME, ICODE, MASK, ATTR },
1201 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1202 { NAME, ICODE, MASK, ATTR },
1204 struct rs6000_builtin_info_type {
1205 const char *name;
1206 const enum insn_code icode;
1207 const HOST_WIDE_INT mask;
1208 const unsigned attr;
1211 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1213 #include "rs6000-builtin.def"
1216 #undef RS6000_BUILTIN_0
1217 #undef RS6000_BUILTIN_1
1218 #undef RS6000_BUILTIN_2
1219 #undef RS6000_BUILTIN_3
1220 #undef RS6000_BUILTIN_A
1221 #undef RS6000_BUILTIN_D
1222 #undef RS6000_BUILTIN_E
1223 #undef RS6000_BUILTIN_H
1224 #undef RS6000_BUILTIN_P
1225 #undef RS6000_BUILTIN_Q
1226 #undef RS6000_BUILTIN_S
1227 #undef RS6000_BUILTIN_X
1229 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1230 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1233 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1234 static bool spe_func_has_64bit_regs_p (void);
1235 static struct machine_function * rs6000_init_machine_status (void);
1236 static int rs6000_ra_ever_killed (void);
1237 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1238 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1239 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1240 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1241 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1242 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1243 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1244 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1245 bool);
1246 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1247 unsigned int);
1248 static bool is_microcoded_insn (rtx_insn *);
1249 static bool is_nonpipeline_insn (rtx_insn *);
1250 static bool is_cracked_insn (rtx_insn *);
1251 static bool is_load_insn (rtx, rtx *);
1252 static bool is_store_insn (rtx, rtx *);
1253 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1254 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1255 static bool insn_must_be_first_in_group (rtx_insn *);
1256 static bool insn_must_be_last_in_group (rtx_insn *);
1257 static void altivec_init_builtins (void);
1258 static tree builtin_function_type (machine_mode, machine_mode,
1259 machine_mode, machine_mode,
1260 enum rs6000_builtins, const char *name);
1261 static void rs6000_common_init_builtins (void);
1262 static void paired_init_builtins (void);
1263 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1264 static void spe_init_builtins (void);
1265 static void htm_init_builtins (void);
1266 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1267 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1268 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1269 static rs6000_stack_t *rs6000_stack_info (void);
1270 static void is_altivec_return_reg (rtx, void *);
1271 int easy_vector_constant (rtx, machine_mode);
1272 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1273 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1274 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1275 bool, bool);
1276 #if TARGET_MACHO
1277 static void macho_branch_islands (void);
1278 #endif
1279 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1280 int, int *);
1281 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1282 int, int, int *);
1283 static bool rs6000_mode_dependent_address (const_rtx);
1284 static bool rs6000_debug_mode_dependent_address (const_rtx);
1285 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1286 machine_mode, rtx);
1287 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1288 machine_mode,
1289 rtx);
1290 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1291 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1292 enum reg_class);
1293 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1294 machine_mode);
1295 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1296 enum reg_class,
1297 machine_mode);
1298 static bool rs6000_cannot_change_mode_class (machine_mode,
1299 machine_mode,
1300 enum reg_class);
1301 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1302 machine_mode,
1303 enum reg_class);
1304 static bool rs6000_save_toc_in_prologue_p (void);
1305 static rtx rs6000_internal_arg_pointer (void);
1307 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1308 int, int *)
1309 = rs6000_legitimize_reload_address;
1311 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1312 = rs6000_mode_dependent_address;
1314 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1315 machine_mode, rtx)
1316 = rs6000_secondary_reload_class;
1318 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1319 = rs6000_preferred_reload_class;
1321 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1322 machine_mode)
1323 = rs6000_secondary_memory_needed;
1325 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1326 machine_mode,
1327 enum reg_class)
1328 = rs6000_cannot_change_mode_class;
1330 const int INSN_NOT_AVAILABLE = -1;
1332 static void rs6000_print_isa_options (FILE *, int, const char *,
1333 HOST_WIDE_INT);
1334 static void rs6000_print_builtin_options (FILE *, int, const char *,
1335 HOST_WIDE_INT);
1337 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1338 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1339 enum rs6000_reg_type,
1340 machine_mode,
1341 secondary_reload_info *,
1342 bool);
1343 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1344 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1345 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1347 /* Hash table stuff for keeping track of TOC entries. */
1349 struct GTY((for_user)) toc_hash_struct
1351 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1352 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1353 rtx key;
1354 machine_mode key_mode;
1355 int labelno;
1358 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1360 static hashval_t hash (toc_hash_struct *);
1361 static bool equal (toc_hash_struct *, toc_hash_struct *);
1364 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1366 /* Hash table to keep track of the argument types for builtin functions. */
1368 struct GTY((for_user)) builtin_hash_struct
1370 tree type;
1371 machine_mode mode[4]; /* return value + 3 arguments. */
1372 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1375 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1377 static hashval_t hash (builtin_hash_struct *);
1378 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1381 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1384 /* Default register names. */
1385 char rs6000_reg_names[][8] =
1387 "0", "1", "2", "3", "4", "5", "6", "7",
1388 "8", "9", "10", "11", "12", "13", "14", "15",
1389 "16", "17", "18", "19", "20", "21", "22", "23",
1390 "24", "25", "26", "27", "28", "29", "30", "31",
1391 "0", "1", "2", "3", "4", "5", "6", "7",
1392 "8", "9", "10", "11", "12", "13", "14", "15",
1393 "16", "17", "18", "19", "20", "21", "22", "23",
1394 "24", "25", "26", "27", "28", "29", "30", "31",
1395 "mq", "lr", "ctr","ap",
1396 "0", "1", "2", "3", "4", "5", "6", "7",
1397 "ca",
1398 /* AltiVec registers. */
1399 "0", "1", "2", "3", "4", "5", "6", "7",
1400 "8", "9", "10", "11", "12", "13", "14", "15",
1401 "16", "17", "18", "19", "20", "21", "22", "23",
1402 "24", "25", "26", "27", "28", "29", "30", "31",
1403 "vrsave", "vscr",
1404 /* SPE registers. */
1405 "spe_acc", "spefscr",
1406 /* Soft frame pointer. */
1407 "sfp",
1408 /* HTM SPR registers. */
1409 "tfhar", "tfiar", "texasr",
1410 /* SPE High registers. */
1411 "0", "1", "2", "3", "4", "5", "6", "7",
1412 "8", "9", "10", "11", "12", "13", "14", "15",
1413 "16", "17", "18", "19", "20", "21", "22", "23",
1414 "24", "25", "26", "27", "28", "29", "30", "31"
1417 #ifdef TARGET_REGNAMES
1418 static const char alt_reg_names[][8] =
1420 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1421 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1422 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1423 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1424 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1425 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1426 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1427 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1428 "mq", "lr", "ctr", "ap",
1429 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1430 "ca",
1431 /* AltiVec registers. */
1432 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1433 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1434 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1435 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1436 "vrsave", "vscr",
1437 /* SPE registers. */
1438 "spe_acc", "spefscr",
1439 /* Soft frame pointer. */
1440 "sfp",
1441 /* HTM SPR registers. */
1442 "tfhar", "tfiar", "texasr",
1443 /* SPE High registers. */
1444 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1445 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1446 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1447 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1449 #endif
1451 /* Table of valid machine attributes. */
1453 static const struct attribute_spec rs6000_attribute_table[] =
1455 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1456 affects_type_identity } */
1457 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1458 false },
1459 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1460 false },
1461 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1462 false },
1463 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1464 false },
1465 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1466 false },
1467 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1468 SUBTARGET_ATTRIBUTE_TABLE,
1469 #endif
1470 { NULL, 0, 0, false, false, false, NULL, false }
1473 #ifndef TARGET_PROFILE_KERNEL
1474 #define TARGET_PROFILE_KERNEL 0
1475 #endif
1477 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1478 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1480 /* Initialize the GCC target structure. */
1481 #undef TARGET_ATTRIBUTE_TABLE
1482 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1483 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1484 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1485 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1486 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1488 #undef TARGET_ASM_ALIGNED_DI_OP
1489 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1491 /* Default unaligned ops are only provided for ELF. Find the ops needed
1492 for non-ELF systems. */
1493 #ifndef OBJECT_FORMAT_ELF
1494 #if TARGET_XCOFF
1495 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1496 64-bit targets. */
1497 #undef TARGET_ASM_UNALIGNED_HI_OP
1498 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1499 #undef TARGET_ASM_UNALIGNED_SI_OP
1500 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1501 #undef TARGET_ASM_UNALIGNED_DI_OP
1502 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1503 #else
1504 /* For Darwin. */
1505 #undef TARGET_ASM_UNALIGNED_HI_OP
1506 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1507 #undef TARGET_ASM_UNALIGNED_SI_OP
1508 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1509 #undef TARGET_ASM_UNALIGNED_DI_OP
1510 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1511 #undef TARGET_ASM_ALIGNED_DI_OP
1512 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1513 #endif
1514 #endif
1516 /* This hook deals with fixups for relocatable code and DI-mode objects
1517 in 64-bit code. */
1518 #undef TARGET_ASM_INTEGER
1519 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1521 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1522 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1523 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1524 #endif
1526 #undef TARGET_SET_UP_BY_PROLOGUE
1527 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1529 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1530 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1531 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1532 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1533 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1534 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1535 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1536 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1537 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1538 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1539 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1540 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1542 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1543 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1545 #undef TARGET_INTERNAL_ARG_POINTER
1546 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1548 #undef TARGET_HAVE_TLS
1549 #define TARGET_HAVE_TLS HAVE_AS_TLS
1551 #undef TARGET_CANNOT_FORCE_CONST_MEM
1552 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1554 #undef TARGET_DELEGITIMIZE_ADDRESS
1555 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1557 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1558 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1560 #undef TARGET_ASM_FUNCTION_PROLOGUE
1561 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1562 #undef TARGET_ASM_FUNCTION_EPILOGUE
1563 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1565 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1566 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1568 #undef TARGET_LEGITIMIZE_ADDRESS
1569 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1571 #undef TARGET_SCHED_VARIABLE_ISSUE
1572 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1574 #undef TARGET_SCHED_ISSUE_RATE
1575 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1576 #undef TARGET_SCHED_ADJUST_COST
1577 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1578 #undef TARGET_SCHED_ADJUST_PRIORITY
1579 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1580 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1581 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1582 #undef TARGET_SCHED_INIT
1583 #define TARGET_SCHED_INIT rs6000_sched_init
1584 #undef TARGET_SCHED_FINISH
1585 #define TARGET_SCHED_FINISH rs6000_sched_finish
1586 #undef TARGET_SCHED_REORDER
1587 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1588 #undef TARGET_SCHED_REORDER2
1589 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1591 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1592 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1594 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1595 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1597 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1598 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1599 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1600 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1601 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1602 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1603 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1604 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1606 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1607 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1608 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1609 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1610 rs6000_builtin_support_vector_misalignment
1611 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1612 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1613 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1614 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1615 rs6000_builtin_vectorization_cost
1616 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1617 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1618 rs6000_preferred_simd_mode
1619 #undef TARGET_VECTORIZE_INIT_COST
1620 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1621 #undef TARGET_VECTORIZE_ADD_STMT_COST
1622 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1623 #undef TARGET_VECTORIZE_FINISH_COST
1624 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1625 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1626 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1628 #undef TARGET_INIT_BUILTINS
1629 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1630 #undef TARGET_BUILTIN_DECL
1631 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1633 #undef TARGET_FOLD_BUILTIN
1634 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1636 #undef TARGET_EXPAND_BUILTIN
1637 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1639 #undef TARGET_MANGLE_TYPE
1640 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1642 #undef TARGET_INIT_LIBFUNCS
1643 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1645 #if TARGET_MACHO
1646 #undef TARGET_BINDS_LOCAL_P
1647 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1648 #endif
1650 #undef TARGET_MS_BITFIELD_LAYOUT_P
1651 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1653 #undef TARGET_ASM_OUTPUT_MI_THUNK
1654 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1656 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1659 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1660 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1662 #undef TARGET_REGISTER_MOVE_COST
1663 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1664 #undef TARGET_MEMORY_MOVE_COST
1665 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1666 #undef TARGET_CANNOT_COPY_INSN_P
1667 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1668 #undef TARGET_RTX_COSTS
1669 #define TARGET_RTX_COSTS rs6000_rtx_costs
1670 #undef TARGET_ADDRESS_COST
1671 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1673 #undef TARGET_DWARF_REGISTER_SPAN
1674 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1676 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1677 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1679 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1680 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1682 #undef TARGET_PROMOTE_FUNCTION_MODE
1683 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1685 #undef TARGET_RETURN_IN_MEMORY
1686 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1688 #undef TARGET_RETURN_IN_MSB
1689 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1691 #undef TARGET_SETUP_INCOMING_VARARGS
1692 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1694 /* Always strict argument naming on rs6000. */
1695 #undef TARGET_STRICT_ARGUMENT_NAMING
1696 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1697 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1698 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1699 #undef TARGET_SPLIT_COMPLEX_ARG
1700 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1701 #undef TARGET_MUST_PASS_IN_STACK
1702 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1703 #undef TARGET_PASS_BY_REFERENCE
1704 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1705 #undef TARGET_ARG_PARTIAL_BYTES
1706 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1707 #undef TARGET_FUNCTION_ARG_ADVANCE
1708 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1709 #undef TARGET_FUNCTION_ARG
1710 #define TARGET_FUNCTION_ARG rs6000_function_arg
1711 #undef TARGET_FUNCTION_ARG_BOUNDARY
1712 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1714 #undef TARGET_BUILD_BUILTIN_VA_LIST
1715 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1717 #undef TARGET_EXPAND_BUILTIN_VA_START
1718 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1720 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1721 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1723 #undef TARGET_EH_RETURN_FILTER_MODE
1724 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1726 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1727 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1729 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1730 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1732 #undef TARGET_FLOATN_MODE
1733 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1735 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1736 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1738 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1739 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1741 #undef TARGET_MD_ASM_ADJUST
1742 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1744 #undef TARGET_OPTION_OVERRIDE
1745 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1749 rs6000_builtin_vectorized_function
1751 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1752 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1753 rs6000_builtin_md_vectorized_function
1755 #ifdef TARGET_THREAD_SSP_OFFSET
1756 #undef TARGET_STACK_PROTECT_GUARD
1757 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
1758 #endif
1760 #if !TARGET_MACHO
1761 #undef TARGET_STACK_PROTECT_FAIL
1762 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1763 #endif
1765 #ifdef HAVE_AS_TLS
1766 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1767 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1768 #endif
1770 /* Use a 32-bit anchor range. This leads to sequences like:
1772 addis tmp,anchor,high
1773 add dest,tmp,low
1775 where tmp itself acts as an anchor, and can be shared between
1776 accesses to the same 64k page. */
1777 #undef TARGET_MIN_ANCHOR_OFFSET
1778 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1779 #undef TARGET_MAX_ANCHOR_OFFSET
1780 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1781 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1782 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1783 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1784 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1786 #undef TARGET_BUILTIN_RECIPROCAL
1787 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1789 #undef TARGET_EXPAND_TO_RTL_HOOK
1790 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1792 #undef TARGET_INSTANTIATE_DECLS
1793 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1795 #undef TARGET_SECONDARY_RELOAD
1796 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1798 #undef TARGET_LEGITIMATE_ADDRESS_P
1799 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1801 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1802 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1804 #undef TARGET_LRA_P
1805 #define TARGET_LRA_P rs6000_lra_p
1807 #undef TARGET_CAN_ELIMINATE
1808 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1810 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1811 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1813 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1814 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1816 #undef TARGET_TRAMPOLINE_INIT
1817 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1819 #undef TARGET_FUNCTION_VALUE
1820 #define TARGET_FUNCTION_VALUE rs6000_function_value
1822 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1823 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1825 #undef TARGET_OPTION_SAVE
1826 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1828 #undef TARGET_OPTION_RESTORE
1829 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1831 #undef TARGET_OPTION_PRINT
1832 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1834 #undef TARGET_CAN_INLINE_P
1835 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1837 #undef TARGET_SET_CURRENT_FUNCTION
1838 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1840 #undef TARGET_LEGITIMATE_CONSTANT_P
1841 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1843 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1844 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1846 #undef TARGET_CAN_USE_DOLOOP_P
1847 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1849 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1850 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1852 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1853 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1854 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1855 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1856 #undef TARGET_UNWIND_WORD_MODE
1857 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1859 #undef TARGET_OFFLOAD_OPTIONS
1860 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1862 #undef TARGET_C_MODE_FOR_SUFFIX
1863 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1865 #undef TARGET_INVALID_BINARY_OP
1866 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1868 #undef TARGET_OPTAB_SUPPORTED_P
1869 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1871 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1872 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1875 /* Processor table. */
1876 struct rs6000_ptt
1878 const char *const name; /* Canonical processor name. */
1879 const enum processor_type processor; /* Processor type enum value. */
1880 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1883 static struct rs6000_ptt const processor_target_table[] =
1885 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1886 #include "rs6000-cpus.def"
1887 #undef RS6000_CPU
1890 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1891 name is invalid. */
1893 static int
1894 rs6000_cpu_name_lookup (const char *name)
1896 size_t i;
1898 if (name != NULL)
1900 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1901 if (! strcmp (name, processor_target_table[i].name))
1902 return (int)i;
1905 return -1;
1909 /* Return number of consecutive hard regs needed starting at reg REGNO
1910 to hold something of mode MODE.
1911 This is ordinarily the length in words of a value of mode MODE
1912 but can be less for certain modes in special long registers.
1914 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1915 scalar instructions. The upper 32 bits are only available to the
1916 SIMD instructions.
1918 POWER and PowerPC GPRs hold 32 bits worth;
1919 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1921 static int
1922 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1924 unsigned HOST_WIDE_INT reg_size;
1926 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1927 128-bit floating point that can go in vector registers, which has VSX
1928 memory addressing. */
1929 if (FP_REGNO_P (regno))
1930 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1931 ? UNITS_PER_VSX_WORD
1932 : UNITS_PER_FP_WORD);
1934 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1935 reg_size = UNITS_PER_SPE_WORD;
1937 else if (ALTIVEC_REGNO_P (regno))
1938 reg_size = UNITS_PER_ALTIVEC_WORD;
1940 /* The value returned for SCmode in the E500 double case is 2 for
1941 ABI compatibility; storing an SCmode value in a single register
1942 would require function_arg and rs6000_spe_function_arg to handle
1943 SCmode so as to pass the value correctly in a pair of
1944 registers. */
1945 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1946 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1947 reg_size = UNITS_PER_FP_WORD;
1949 else
1950 reg_size = UNITS_PER_WORD;
1952 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1955 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1956 MODE. */
1957 static int
1958 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1960 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1962 if (COMPLEX_MODE_P (mode))
1963 mode = GET_MODE_INNER (mode);
1965 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1966 register combinations, and use PTImode where we need to deal with quad
1967 word memory operations. Don't allow quad words in the argument or frame
1968 pointer registers, just registers 0..31. */
1969 if (mode == PTImode)
1970 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1971 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1972 && ((regno & 1) == 0));
1974 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1975 implementations. Don't allow an item to be split between a FP register
1976 and an Altivec register. Allow TImode in all VSX registers if the user
1977 asked for it. */
1978 if (TARGET_VSX && VSX_REGNO_P (regno)
1979 && (VECTOR_MEM_VSX_P (mode)
1980 || FLOAT128_VECTOR_P (mode)
1981 || reg_addr[mode].scalar_in_vmx_p
1982 || (TARGET_VSX_TIMODE && mode == TImode)
1983 || (TARGET_VADDUQM && mode == V1TImode)
1984 || (TARGET_UPPER_REGS_DI && mode == DImode)))
1986 if (FP_REGNO_P (regno))
1987 return FP_REGNO_P (last_regno);
1989 if (ALTIVEC_REGNO_P (regno))
1991 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1992 return 0;
1994 return ALTIVEC_REGNO_P (last_regno);
1998 /* The GPRs can hold any mode, but values bigger than one register
1999 cannot go past R31. */
2000 if (INT_REGNO_P (regno))
2001 return INT_REGNO_P (last_regno);
2003 /* The float registers (except for VSX vector modes) can only hold floating
2004 modes and DImode. */
2005 if (FP_REGNO_P (regno))
2007 if (FLOAT128_VECTOR_P (mode))
2008 return false;
2010 if (SCALAR_FLOAT_MODE_P (mode)
2011 && (mode != TDmode || (regno % 2) == 0)
2012 && FP_REGNO_P (last_regno))
2013 return 1;
2015 if (GET_MODE_CLASS (mode) == MODE_INT
2016 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2017 return 1;
2019 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2020 && PAIRED_VECTOR_MODE (mode))
2021 return 1;
2023 return 0;
2026 /* The CR register can only hold CC modes. */
2027 if (CR_REGNO_P (regno))
2028 return GET_MODE_CLASS (mode) == MODE_CC;
2030 if (CA_REGNO_P (regno))
2031 return mode == Pmode || mode == SImode;
2033 /* AltiVec only in AldyVec registers. */
2034 if (ALTIVEC_REGNO_P (regno))
2035 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2036 || mode == V1TImode);
2038 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2039 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2040 return 1;
2042 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2043 and it must be able to fit within the register set. */
2045 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2048 /* Print interesting facts about registers. */
2049 static void
2050 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2052 int r, m;
2054 for (r = first_regno; r <= last_regno; ++r)
2056 const char *comma = "";
2057 int len;
2059 if (first_regno == last_regno)
2060 fprintf (stderr, "%s:\t", reg_name);
2061 else
2062 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2064 len = 8;
2065 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2066 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2068 if (len > 70)
2070 fprintf (stderr, ",\n\t");
2071 len = 8;
2072 comma = "";
2075 if (rs6000_hard_regno_nregs[m][r] > 1)
2076 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2077 rs6000_hard_regno_nregs[m][r]);
2078 else
2079 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2081 comma = ", ";
2084 if (call_used_regs[r])
2086 if (len > 70)
2088 fprintf (stderr, ",\n\t");
2089 len = 8;
2090 comma = "";
2093 len += fprintf (stderr, "%s%s", comma, "call-used");
2094 comma = ", ";
2097 if (fixed_regs[r])
2099 if (len > 70)
2101 fprintf (stderr, ",\n\t");
2102 len = 8;
2103 comma = "";
2106 len += fprintf (stderr, "%s%s", comma, "fixed");
2107 comma = ", ";
2110 if (len > 70)
2112 fprintf (stderr, ",\n\t");
2113 comma = "";
2116 len += fprintf (stderr, "%sreg-class = %s", comma,
2117 reg_class_names[(int)rs6000_regno_regclass[r]]);
2118 comma = ", ";
2120 if (len > 70)
2122 fprintf (stderr, ",\n\t");
2123 comma = "";
2126 fprintf (stderr, "%sregno = %d\n", comma, r);
2130 static const char *
2131 rs6000_debug_vector_unit (enum rs6000_vector v)
2133 const char *ret;
2135 switch (v)
2137 case VECTOR_NONE: ret = "none"; break;
2138 case VECTOR_ALTIVEC: ret = "altivec"; break;
2139 case VECTOR_VSX: ret = "vsx"; break;
2140 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2141 case VECTOR_PAIRED: ret = "paired"; break;
2142 case VECTOR_SPE: ret = "spe"; break;
2143 case VECTOR_OTHER: ret = "other"; break;
2144 default: ret = "unknown"; break;
2147 return ret;
2150 /* Inner function printing just the address mask for a particular reload
2151 register class. */
2152 DEBUG_FUNCTION char *
2153 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2155 static char ret[8];
2156 char *p = ret;
2158 if ((mask & RELOAD_REG_VALID) != 0)
2159 *p++ = 'v';
2160 else if (keep_spaces)
2161 *p++ = ' ';
2163 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2164 *p++ = 'm';
2165 else if (keep_spaces)
2166 *p++ = ' ';
2168 if ((mask & RELOAD_REG_INDEXED) != 0)
2169 *p++ = 'i';
2170 else if (keep_spaces)
2171 *p++ = ' ';
2173 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2174 *p++ = 'O';
2175 else if ((mask & RELOAD_REG_OFFSET) != 0)
2176 *p++ = 'o';
2177 else if (keep_spaces)
2178 *p++ = ' ';
2180 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2181 *p++ = '+';
2182 else if (keep_spaces)
2183 *p++ = ' ';
2185 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2186 *p++ = '+';
2187 else if (keep_spaces)
2188 *p++ = ' ';
2190 if ((mask & RELOAD_REG_AND_M16) != 0)
2191 *p++ = '&';
2192 else if (keep_spaces)
2193 *p++ = ' ';
2195 *p = '\0';
2197 return ret;
2200 /* Print the address masks in a human readble fashion. */
2201 DEBUG_FUNCTION void
2202 rs6000_debug_print_mode (ssize_t m)
2204 ssize_t rc;
2205 int spaces = 0;
2206 bool fuse_extra_p;
2208 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2209 for (rc = 0; rc < N_RELOAD_REG; rc++)
2210 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2211 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2213 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2214 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2215 fprintf (stderr, " Reload=%c%c",
2216 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2217 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2218 else
2219 spaces += sizeof (" Reload=sl") - 1;
2221 if (reg_addr[m].scalar_in_vmx_p)
2223 fprintf (stderr, "%*s Upper=y", spaces, "");
2224 spaces = 0;
2226 else
2227 spaces += sizeof (" Upper=y") - 1;
2229 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2230 || reg_addr[m].fused_toc);
2231 if (!fuse_extra_p)
2233 for (rc = 0; rc < N_RELOAD_REG; rc++)
2235 if (rc != RELOAD_REG_ANY)
2237 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2238 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2239 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2240 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2241 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2243 fuse_extra_p = true;
2244 break;
2250 if (fuse_extra_p)
2252 fprintf (stderr, "%*s Fuse:", spaces, "");
2253 spaces = 0;
2255 for (rc = 0; rc < N_RELOAD_REG; rc++)
2257 if (rc != RELOAD_REG_ANY)
2259 char load, store;
2261 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2262 load = 'l';
2263 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2264 load = 'L';
2265 else
2266 load = '-';
2268 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2269 store = 's';
2270 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2271 store = 'S';
2272 else
2273 store = '-';
2275 if (load == '-' && store == '-')
2276 spaces += 5;
2277 else
2279 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2280 reload_reg_map[rc].name[0], load, store);
2281 spaces = 0;
2286 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2288 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2289 spaces = 0;
2291 else
2292 spaces += sizeof (" P8gpr") - 1;
2294 if (reg_addr[m].fused_toc)
2296 fprintf (stderr, "%*sToc", (spaces + 1), "");
2297 spaces = 0;
2299 else
2300 spaces += sizeof (" Toc") - 1;
2302 else
2303 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2305 if (rs6000_vector_unit[m] != VECTOR_NONE
2306 || rs6000_vector_mem[m] != VECTOR_NONE)
2308 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2309 spaces, "",
2310 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2311 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2314 fputs ("\n", stderr);
2317 #define DEBUG_FMT_ID "%-32s= "
2318 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2319 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2320 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2322 /* Print various interesting information with -mdebug=reg. */
2323 static void
2324 rs6000_debug_reg_global (void)
2326 static const char *const tf[2] = { "false", "true" };
2327 const char *nl = (const char *)0;
2328 int m;
2329 size_t m1, m2, v;
2330 char costly_num[20];
2331 char nop_num[20];
2332 char flags_buffer[40];
2333 const char *costly_str;
2334 const char *nop_str;
2335 const char *trace_str;
2336 const char *abi_str;
2337 const char *cmodel_str;
2338 struct cl_target_option cl_opts;
2340 /* Modes we want tieable information on. */
2341 static const machine_mode print_tieable_modes[] = {
2342 QImode,
2343 HImode,
2344 SImode,
2345 DImode,
2346 TImode,
2347 PTImode,
2348 SFmode,
2349 DFmode,
2350 TFmode,
2351 IFmode,
2352 KFmode,
2353 SDmode,
2354 DDmode,
2355 TDmode,
2356 V8QImode,
2357 V4HImode,
2358 V2SImode,
2359 V16QImode,
2360 V8HImode,
2361 V4SImode,
2362 V2DImode,
2363 V1TImode,
2364 V32QImode,
2365 V16HImode,
2366 V8SImode,
2367 V4DImode,
2368 V2TImode,
2369 V2SFmode,
2370 V4SFmode,
2371 V2DFmode,
2372 V8SFmode,
2373 V4DFmode,
2374 CCmode,
2375 CCUNSmode,
2376 CCEQmode,
2379 /* Virtual regs we are interested in. */
2380 const static struct {
2381 int regno; /* register number. */
2382 const char *name; /* register name. */
2383 } virtual_regs[] = {
2384 { STACK_POINTER_REGNUM, "stack pointer:" },
2385 { TOC_REGNUM, "toc: " },
2386 { STATIC_CHAIN_REGNUM, "static chain: " },
2387 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2388 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2389 { ARG_POINTER_REGNUM, "arg pointer: " },
2390 { FRAME_POINTER_REGNUM, "frame pointer:" },
2391 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2392 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2393 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2394 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2395 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2396 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2397 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2398 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2399 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2402 fputs ("\nHard register information:\n", stderr);
2403 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2404 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2405 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2406 LAST_ALTIVEC_REGNO,
2407 "vs");
2408 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2409 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2410 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2411 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2412 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2413 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2414 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2415 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2417 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2418 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2419 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2421 fprintf (stderr,
2422 "\n"
2423 "d reg_class = %s\n"
2424 "f reg_class = %s\n"
2425 "v reg_class = %s\n"
2426 "wa reg_class = %s\n"
2427 "wb reg_class = %s\n"
2428 "wd reg_class = %s\n"
2429 "we reg_class = %s\n"
2430 "wf reg_class = %s\n"
2431 "wg reg_class = %s\n"
2432 "wh reg_class = %s\n"
2433 "wi reg_class = %s\n"
2434 "wj reg_class = %s\n"
2435 "wk reg_class = %s\n"
2436 "wl reg_class = %s\n"
2437 "wm reg_class = %s\n"
2438 "wo reg_class = %s\n"
2439 "wp reg_class = %s\n"
2440 "wq reg_class = %s\n"
2441 "wr reg_class = %s\n"
2442 "ws reg_class = %s\n"
2443 "wt reg_class = %s\n"
2444 "wu reg_class = %s\n"
2445 "wv reg_class = %s\n"
2446 "ww reg_class = %s\n"
2447 "wx reg_class = %s\n"
2448 "wy reg_class = %s\n"
2449 "wz reg_class = %s\n"
2450 "\n",
2451 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2452 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2453 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2454 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2455 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2456 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2457 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2458 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2459 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2460 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2461 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2462 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2463 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2464 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2465 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2466 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2467 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2468 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2469 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2470 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2471 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2472 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2473 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2474 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2475 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2476 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2477 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2479 nl = "\n";
2480 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2481 rs6000_debug_print_mode (m);
2483 fputs ("\n", stderr);
2485 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2487 machine_mode mode1 = print_tieable_modes[m1];
2488 bool first_time = true;
2490 nl = (const char *)0;
2491 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2493 machine_mode mode2 = print_tieable_modes[m2];
2494 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2496 if (first_time)
2498 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2499 nl = "\n";
2500 first_time = false;
2503 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2507 if (!first_time)
2508 fputs ("\n", stderr);
2511 if (nl)
2512 fputs (nl, stderr);
2514 if (rs6000_recip_control)
2516 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2518 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2519 if (rs6000_recip_bits[m])
2521 fprintf (stderr,
2522 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2523 GET_MODE_NAME (m),
2524 (RS6000_RECIP_AUTO_RE_P (m)
2525 ? "auto"
2526 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2527 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2528 ? "auto"
2529 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2532 fputs ("\n", stderr);
2535 if (rs6000_cpu_index >= 0)
2537 const char *name = processor_target_table[rs6000_cpu_index].name;
2538 HOST_WIDE_INT flags
2539 = processor_target_table[rs6000_cpu_index].target_enable;
2541 sprintf (flags_buffer, "-mcpu=%s flags", name);
2542 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2544 else
2545 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2547 if (rs6000_tune_index >= 0)
2549 const char *name = processor_target_table[rs6000_tune_index].name;
2550 HOST_WIDE_INT flags
2551 = processor_target_table[rs6000_tune_index].target_enable;
2553 sprintf (flags_buffer, "-mtune=%s flags", name);
2554 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2556 else
2557 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2559 cl_target_option_save (&cl_opts, &global_options);
2560 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2561 rs6000_isa_flags);
2563 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2564 rs6000_isa_flags_explicit);
2566 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2567 rs6000_builtin_mask);
2569 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2571 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2572 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2574 switch (rs6000_sched_costly_dep)
2576 case max_dep_latency:
2577 costly_str = "max_dep_latency";
2578 break;
2580 case no_dep_costly:
2581 costly_str = "no_dep_costly";
2582 break;
2584 case all_deps_costly:
2585 costly_str = "all_deps_costly";
2586 break;
2588 case true_store_to_load_dep_costly:
2589 costly_str = "true_store_to_load_dep_costly";
2590 break;
2592 case store_to_load_dep_costly:
2593 costly_str = "store_to_load_dep_costly";
2594 break;
2596 default:
2597 costly_str = costly_num;
2598 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2599 break;
2602 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2604 switch (rs6000_sched_insert_nops)
2606 case sched_finish_regroup_exact:
2607 nop_str = "sched_finish_regroup_exact";
2608 break;
2610 case sched_finish_pad_groups:
2611 nop_str = "sched_finish_pad_groups";
2612 break;
2614 case sched_finish_none:
2615 nop_str = "sched_finish_none";
2616 break;
2618 default:
2619 nop_str = nop_num;
2620 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2621 break;
2624 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2626 switch (rs6000_sdata)
2628 default:
2629 case SDATA_NONE:
2630 break;
2632 case SDATA_DATA:
2633 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2634 break;
2636 case SDATA_SYSV:
2637 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2638 break;
2640 case SDATA_EABI:
2641 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2642 break;
2646 switch (rs6000_traceback)
2648 case traceback_default: trace_str = "default"; break;
2649 case traceback_none: trace_str = "none"; break;
2650 case traceback_part: trace_str = "part"; break;
2651 case traceback_full: trace_str = "full"; break;
2652 default: trace_str = "unknown"; break;
2655 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2657 switch (rs6000_current_cmodel)
2659 case CMODEL_SMALL: cmodel_str = "small"; break;
2660 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2661 case CMODEL_LARGE: cmodel_str = "large"; break;
2662 default: cmodel_str = "unknown"; break;
2665 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2667 switch (rs6000_current_abi)
2669 case ABI_NONE: abi_str = "none"; break;
2670 case ABI_AIX: abi_str = "aix"; break;
2671 case ABI_ELFv2: abi_str = "ELFv2"; break;
2672 case ABI_V4: abi_str = "V4"; break;
2673 case ABI_DARWIN: abi_str = "darwin"; break;
2674 default: abi_str = "unknown"; break;
2677 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2679 if (rs6000_altivec_abi)
2680 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2682 if (rs6000_spe_abi)
2683 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2685 if (rs6000_darwin64_abi)
2686 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2688 if (rs6000_float_gprs)
2689 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2691 fprintf (stderr, DEBUG_FMT_S, "fprs",
2692 (TARGET_FPRS ? "true" : "false"));
2694 fprintf (stderr, DEBUG_FMT_S, "single_float",
2695 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2697 fprintf (stderr, DEBUG_FMT_S, "double_float",
2698 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2700 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2701 (TARGET_SOFT_FLOAT ? "true" : "false"));
2703 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2704 (TARGET_E500_SINGLE ? "true" : "false"));
2706 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2707 (TARGET_E500_DOUBLE ? "true" : "false"));
2709 if (TARGET_LINK_STACK)
2710 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2712 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2714 if (TARGET_P8_FUSION)
2716 char options[80];
2718 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2719 if (TARGET_TOC_FUSION)
2720 strcat (options, ", toc");
2722 if (TARGET_P8_FUSION_SIGN)
2723 strcat (options, ", sign");
2725 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2728 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2729 TARGET_SECURE_PLT ? "secure" : "bss");
2730 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2731 aix_struct_return ? "aix" : "sysv");
2732 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2733 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2734 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2735 tf[!!rs6000_align_branch_targets]);
2736 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2737 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2738 rs6000_long_double_type_size);
2739 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2740 (int)rs6000_sched_restricted_insns_priority);
2741 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2742 (int)END_BUILTINS);
2743 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2744 (int)RS6000_BUILTIN_COUNT);
2746 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2747 (int)TARGET_FLOAT128_ENABLE_TYPE);
2749 if (TARGET_VSX)
2750 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2751 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2753 if (TARGET_DIRECT_MOVE_128)
2754 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2755 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2759 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2760 legitimate address support to figure out the appropriate addressing to
2761 use. */
2763 static void
2764 rs6000_setup_reg_addr_masks (void)
2766 ssize_t rc, reg, m, nregs;
2767 addr_mask_type any_addr_mask, addr_mask;
2769 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2771 machine_mode m2 = (machine_mode) m;
2772 bool complex_p = false;
2773 size_t msize;
2775 if (COMPLEX_MODE_P (m2))
2777 complex_p = true;
2778 m2 = GET_MODE_INNER (m2);
2781 msize = GET_MODE_SIZE (m2);
2783 /* SDmode is special in that we want to access it only via REG+REG
2784 addressing on power7 and above, since we want to use the LFIWZX and
2785 STFIWZX instructions to load it. */
2786 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2788 any_addr_mask = 0;
2789 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2791 addr_mask = 0;
2792 reg = reload_reg_map[rc].reg;
2794 /* Can mode values go in the GPR/FPR/Altivec registers? */
2795 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2797 nregs = rs6000_hard_regno_nregs[m][reg];
2798 addr_mask |= RELOAD_REG_VALID;
2800 /* Indicate if the mode takes more than 1 physical register. If
2801 it takes a single register, indicate it can do REG+REG
2802 addressing. */
2803 if (nregs > 1 || m == BLKmode || complex_p)
2804 addr_mask |= RELOAD_REG_MULTIPLE;
2805 else
2806 addr_mask |= RELOAD_REG_INDEXED;
2808 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2809 addressing. Restrict addressing on SPE for 64-bit types
2810 because of the SUBREG hackery used to address 64-bit floats in
2811 '32-bit' GPRs. If we allow scalars into Altivec registers,
2812 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2814 if (TARGET_UPDATE
2815 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2816 && msize <= 8
2817 && !VECTOR_MODE_P (m2)
2818 && !FLOAT128_VECTOR_P (m2)
2819 && !complex_p
2820 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2821 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2822 && !(TARGET_E500_DOUBLE && msize == 8))
2824 addr_mask |= RELOAD_REG_PRE_INCDEC;
2826 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2827 we don't allow PRE_MODIFY for some multi-register
2828 operations. */
2829 switch (m)
2831 default:
2832 addr_mask |= RELOAD_REG_PRE_MODIFY;
2833 break;
2835 case DImode:
2836 if (TARGET_POWERPC64)
2837 addr_mask |= RELOAD_REG_PRE_MODIFY;
2838 break;
2840 case DFmode:
2841 case DDmode:
2842 if (TARGET_DF_INSN)
2843 addr_mask |= RELOAD_REG_PRE_MODIFY;
2844 break;
2849 /* GPR and FPR registers can do REG+OFFSET addressing, except
2850 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2851 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2852 if ((addr_mask != 0) && !indexed_only_p
2853 && msize <= 8
2854 && (rc == RELOAD_REG_GPR
2855 || ((msize == 8 || m2 == SFmode)
2856 && (rc == RELOAD_REG_FPR
2857 || (rc == RELOAD_REG_VMX
2858 && TARGET_P9_DFORM_SCALAR)))))
2859 addr_mask |= RELOAD_REG_OFFSET;
2861 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2862 instructions are enabled. The offset for 128-bit VSX registers is
2863 only 12-bits. While GPRs can handle the full offset range, VSX
2864 registers can only handle the restricted range. */
2865 else if ((addr_mask != 0) && !indexed_only_p
2866 && msize == 16 && TARGET_P9_DFORM_VECTOR
2867 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2868 || (m2 == TImode && TARGET_VSX_TIMODE)))
2870 addr_mask |= RELOAD_REG_OFFSET;
2871 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2872 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2875 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2876 addressing on 128-bit types. */
2877 if (rc == RELOAD_REG_VMX && msize == 16
2878 && (addr_mask & RELOAD_REG_VALID) != 0)
2879 addr_mask |= RELOAD_REG_AND_M16;
2881 reg_addr[m].addr_mask[rc] = addr_mask;
2882 any_addr_mask |= addr_mask;
2885 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2890 /* Initialize the various global tables that are based on register size. */
2891 static void
2892 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2894 ssize_t r, m, c;
2895 int align64;
2896 int align32;
2898 /* Precalculate REGNO_REG_CLASS. */
2899 rs6000_regno_regclass[0] = GENERAL_REGS;
2900 for (r = 1; r < 32; ++r)
2901 rs6000_regno_regclass[r] = BASE_REGS;
2903 for (r = 32; r < 64; ++r)
2904 rs6000_regno_regclass[r] = FLOAT_REGS;
2906 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2907 rs6000_regno_regclass[r] = NO_REGS;
2909 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2910 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2912 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2913 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2914 rs6000_regno_regclass[r] = CR_REGS;
2916 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2917 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2918 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2919 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2920 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2921 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2922 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2923 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2924 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2925 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2926 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2927 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2929 /* Precalculate register class to simpler reload register class. We don't
2930 need all of the register classes that are combinations of different
2931 classes, just the simple ones that have constraint letters. */
2932 for (c = 0; c < N_REG_CLASSES; c++)
2933 reg_class_to_reg_type[c] = NO_REG_TYPE;
2935 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2936 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2937 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2938 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2939 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2940 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2941 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2942 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2943 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2944 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2945 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2946 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2948 if (TARGET_VSX)
2950 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2951 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2953 else
2955 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2956 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2959 /* Precalculate the valid memory formats as well as the vector information,
2960 this must be set up before the rs6000_hard_regno_nregs_internal calls
2961 below. */
2962 gcc_assert ((int)VECTOR_NONE == 0);
2963 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2964 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2966 gcc_assert ((int)CODE_FOR_nothing == 0);
2967 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2969 gcc_assert ((int)NO_REGS == 0);
2970 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2972 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2973 believes it can use native alignment or still uses 128-bit alignment. */
2974 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2976 align64 = 64;
2977 align32 = 32;
2979 else
2981 align64 = 128;
2982 align32 = 128;
2985 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2986 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2987 if (TARGET_FLOAT128_TYPE)
2989 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2990 rs6000_vector_align[KFmode] = 128;
2992 if (FLOAT128_IEEE_P (TFmode))
2994 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2995 rs6000_vector_align[TFmode] = 128;
2999 /* V2DF mode, VSX only. */
3000 if (TARGET_VSX)
3002 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3003 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3004 rs6000_vector_align[V2DFmode] = align64;
3007 /* V4SF mode, either VSX or Altivec. */
3008 if (TARGET_VSX)
3010 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3011 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3012 rs6000_vector_align[V4SFmode] = align32;
3014 else if (TARGET_ALTIVEC)
3016 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3017 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3018 rs6000_vector_align[V4SFmode] = align32;
3021 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3022 and stores. */
3023 if (TARGET_ALTIVEC)
3025 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3026 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3027 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3028 rs6000_vector_align[V4SImode] = align32;
3029 rs6000_vector_align[V8HImode] = align32;
3030 rs6000_vector_align[V16QImode] = align32;
3032 if (TARGET_VSX)
3034 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3035 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3036 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3038 else
3040 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3041 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3042 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3046 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3047 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3048 if (TARGET_VSX)
3050 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3051 rs6000_vector_unit[V2DImode]
3052 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3053 rs6000_vector_align[V2DImode] = align64;
3055 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3056 rs6000_vector_unit[V1TImode]
3057 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3058 rs6000_vector_align[V1TImode] = 128;
3061 /* DFmode, see if we want to use the VSX unit. Memory is handled
3062 differently, so don't set rs6000_vector_mem. */
3063 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3065 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3066 rs6000_vector_align[DFmode] = 64;
3069 /* SFmode, see if we want to use the VSX unit. */
3070 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3072 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3073 rs6000_vector_align[SFmode] = 32;
3076 /* Allow TImode in VSX register and set the VSX memory macros. */
3077 if (TARGET_VSX && TARGET_VSX_TIMODE)
3079 rs6000_vector_mem[TImode] = VECTOR_VSX;
3080 rs6000_vector_align[TImode] = align64;
3083 /* TODO add SPE and paired floating point vector support. */
3085 /* Register class constraints for the constraints that depend on compile
3086 switches. When the VSX code was added, different constraints were added
3087 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3088 of the VSX registers are used. The register classes for scalar floating
3089 point types is set, based on whether we allow that type into the upper
3090 (Altivec) registers. GCC has register classes to target the Altivec
3091 registers for load/store operations, to select using a VSX memory
3092 operation instead of the traditional floating point operation. The
3093 constraints are:
3095 d - Register class to use with traditional DFmode instructions.
3096 f - Register class to use with traditional SFmode instructions.
3097 v - Altivec register.
3098 wa - Any VSX register.
3099 wc - Reserved to represent individual CR bits (used in LLVM).
3100 wd - Preferred register class for V2DFmode.
3101 wf - Preferred register class for V4SFmode.
3102 wg - Float register for power6x move insns.
3103 wh - FP register for direct move instructions.
3104 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3105 wj - FP or VSX register to hold 64-bit integers for direct moves.
3106 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3107 wl - Float register if we can do 32-bit signed int loads.
3108 wm - VSX register for ISA 2.07 direct move operations.
3109 wn - always NO_REGS.
3110 wr - GPR if 64-bit mode is permitted.
3111 ws - Register class to do ISA 2.06 DF operations.
3112 wt - VSX register for TImode in VSX registers.
3113 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3114 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3115 ww - Register class to do SF conversions in with VSX operations.
3116 wx - Float register if we can do 32-bit int stores.
3117 wy - Register class to do ISA 2.07 SF operations.
3118 wz - Float register if we can do 32-bit unsigned int loads. */
3120 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3121 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3123 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3124 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3126 if (TARGET_VSX)
3128 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3129 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3130 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3132 if (TARGET_VSX_TIMODE)
3133 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3135 if (TARGET_UPPER_REGS_DF) /* DFmode */
3137 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3138 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3140 else
3141 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3143 if (TARGET_UPPER_REGS_DF) /* DImode */
3144 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3145 else
3146 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3149 /* Add conditional constraints based on various options, to allow us to
3150 collapse multiple insn patterns. */
3151 if (TARGET_ALTIVEC)
3152 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3154 if (TARGET_MFPGPR) /* DFmode */
3155 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3157 if (TARGET_LFIWAX)
3158 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3160 if (TARGET_DIRECT_MOVE)
3162 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3163 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3164 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3165 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3166 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3167 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3170 if (TARGET_POWERPC64)
3171 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3173 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3175 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3176 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3177 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3179 else if (TARGET_P8_VECTOR)
3181 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3182 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3184 else if (TARGET_VSX)
3185 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3187 if (TARGET_STFIWX)
3188 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3190 if (TARGET_LFIWZX)
3191 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3193 if (TARGET_FLOAT128_TYPE)
3195 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3196 if (FLOAT128_IEEE_P (TFmode))
3197 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3200 /* Support for new D-form instructions. */
3201 if (TARGET_P9_DFORM_SCALAR)
3202 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3204 /* Support for ISA 3.0 (power9) vectors. */
3205 if (TARGET_P9_VECTOR)
3206 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3208 /* Support for new direct moves (ISA 3.0 + 64bit). */
3209 if (TARGET_DIRECT_MOVE_128)
3210 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3212 /* Set up the reload helper and direct move functions. */
3213 if (TARGET_VSX || TARGET_ALTIVEC)
3215 if (TARGET_64BIT)
3217 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3218 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3219 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3220 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3221 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3222 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3223 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3224 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3225 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3226 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3227 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3228 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3229 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3230 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3231 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3232 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3233 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3234 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3235 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3236 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3238 if (FLOAT128_VECTOR_P (KFmode))
3240 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3241 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3244 if (FLOAT128_VECTOR_P (TFmode))
3246 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3247 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3250 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3251 available. */
3252 if (TARGET_NO_SDMODE_STACK)
3254 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3255 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3258 if (TARGET_VSX_TIMODE)
3260 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3261 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3264 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3266 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3267 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3268 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3269 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3270 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3271 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3272 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3273 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3274 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3276 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3277 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3278 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3279 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3280 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3281 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3282 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3283 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3284 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3286 if (FLOAT128_VECTOR_P (KFmode))
3288 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3289 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3292 if (FLOAT128_VECTOR_P (TFmode))
3294 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3295 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3299 else
3301 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3302 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3303 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3304 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3305 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3306 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3307 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3308 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3309 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3310 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3311 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3312 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3313 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3314 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3315 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3316 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3317 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3318 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3319 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3320 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3322 if (FLOAT128_VECTOR_P (KFmode))
3324 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3325 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3328 if (FLOAT128_IEEE_P (TFmode))
3330 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3331 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3334 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3335 available. */
3336 if (TARGET_NO_SDMODE_STACK)
3338 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3339 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3342 if (TARGET_VSX_TIMODE)
3344 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3345 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3348 if (TARGET_DIRECT_MOVE)
3350 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3351 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3352 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3356 if (TARGET_UPPER_REGS_DF)
3357 reg_addr[DFmode].scalar_in_vmx_p = true;
3359 if (TARGET_UPPER_REGS_DI)
3360 reg_addr[DImode].scalar_in_vmx_p = true;
3362 if (TARGET_UPPER_REGS_SF)
3363 reg_addr[SFmode].scalar_in_vmx_p = true;
3366 /* Setup the fusion operations. */
3367 if (TARGET_P8_FUSION)
3369 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3370 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3371 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3372 if (TARGET_64BIT)
3373 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3376 if (TARGET_P9_FUSION)
3378 struct fuse_insns {
3379 enum machine_mode mode; /* mode of the fused type. */
3380 enum machine_mode pmode; /* pointer mode. */
3381 enum rs6000_reload_reg_type rtype; /* register type. */
3382 enum insn_code load; /* load insn. */
3383 enum insn_code store; /* store insn. */
3386 static const struct fuse_insns addis_insns[] = {
3387 { SFmode, DImode, RELOAD_REG_FPR,
3388 CODE_FOR_fusion_fpr_di_sf_load,
3389 CODE_FOR_fusion_fpr_di_sf_store },
3391 { SFmode, SImode, RELOAD_REG_FPR,
3392 CODE_FOR_fusion_fpr_si_sf_load,
3393 CODE_FOR_fusion_fpr_si_sf_store },
3395 { DFmode, DImode, RELOAD_REG_FPR,
3396 CODE_FOR_fusion_fpr_di_df_load,
3397 CODE_FOR_fusion_fpr_di_df_store },
3399 { DFmode, SImode, RELOAD_REG_FPR,
3400 CODE_FOR_fusion_fpr_si_df_load,
3401 CODE_FOR_fusion_fpr_si_df_store },
3403 { DImode, DImode, RELOAD_REG_FPR,
3404 CODE_FOR_fusion_fpr_di_di_load,
3405 CODE_FOR_fusion_fpr_di_di_store },
3407 { DImode, SImode, RELOAD_REG_FPR,
3408 CODE_FOR_fusion_fpr_si_di_load,
3409 CODE_FOR_fusion_fpr_si_di_store },
3411 { QImode, DImode, RELOAD_REG_GPR,
3412 CODE_FOR_fusion_gpr_di_qi_load,
3413 CODE_FOR_fusion_gpr_di_qi_store },
3415 { QImode, SImode, RELOAD_REG_GPR,
3416 CODE_FOR_fusion_gpr_si_qi_load,
3417 CODE_FOR_fusion_gpr_si_qi_store },
3419 { HImode, DImode, RELOAD_REG_GPR,
3420 CODE_FOR_fusion_gpr_di_hi_load,
3421 CODE_FOR_fusion_gpr_di_hi_store },
3423 { HImode, SImode, RELOAD_REG_GPR,
3424 CODE_FOR_fusion_gpr_si_hi_load,
3425 CODE_FOR_fusion_gpr_si_hi_store },
3427 { SImode, DImode, RELOAD_REG_GPR,
3428 CODE_FOR_fusion_gpr_di_si_load,
3429 CODE_FOR_fusion_gpr_di_si_store },
3431 { SImode, SImode, RELOAD_REG_GPR,
3432 CODE_FOR_fusion_gpr_si_si_load,
3433 CODE_FOR_fusion_gpr_si_si_store },
3435 { SFmode, DImode, RELOAD_REG_GPR,
3436 CODE_FOR_fusion_gpr_di_sf_load,
3437 CODE_FOR_fusion_gpr_di_sf_store },
3439 { SFmode, SImode, RELOAD_REG_GPR,
3440 CODE_FOR_fusion_gpr_si_sf_load,
3441 CODE_FOR_fusion_gpr_si_sf_store },
3443 { DImode, DImode, RELOAD_REG_GPR,
3444 CODE_FOR_fusion_gpr_di_di_load,
3445 CODE_FOR_fusion_gpr_di_di_store },
3447 { DFmode, DImode, RELOAD_REG_GPR,
3448 CODE_FOR_fusion_gpr_di_df_load,
3449 CODE_FOR_fusion_gpr_di_df_store },
3452 enum machine_mode cur_pmode = Pmode;
3453 size_t i;
3455 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3457 enum machine_mode xmode = addis_insns[i].mode;
3458 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3460 if (addis_insns[i].pmode != cur_pmode)
3461 continue;
3463 if (rtype == RELOAD_REG_FPR
3464 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3465 continue;
3467 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3468 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3472 /* Note which types we support fusing TOC setup plus memory insn. We only do
3473 fused TOCs for medium/large code models. */
3474 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3475 && (TARGET_CMODEL != CMODEL_SMALL))
3477 reg_addr[QImode].fused_toc = true;
3478 reg_addr[HImode].fused_toc = true;
3479 reg_addr[SImode].fused_toc = true;
3480 reg_addr[DImode].fused_toc = true;
3481 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3483 if (TARGET_SINGLE_FLOAT)
3484 reg_addr[SFmode].fused_toc = true;
3485 if (TARGET_DOUBLE_FLOAT)
3486 reg_addr[DFmode].fused_toc = true;
3490 /* Precalculate HARD_REGNO_NREGS. */
3491 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3492 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3493 rs6000_hard_regno_nregs[m][r]
3494 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3496 /* Precalculate HARD_REGNO_MODE_OK. */
3497 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3498 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3499 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3500 rs6000_hard_regno_mode_ok_p[m][r] = true;
3502 /* Precalculate CLASS_MAX_NREGS sizes. */
3503 for (c = 0; c < LIM_REG_CLASSES; ++c)
3505 int reg_size;
3507 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3508 reg_size = UNITS_PER_VSX_WORD;
3510 else if (c == ALTIVEC_REGS)
3511 reg_size = UNITS_PER_ALTIVEC_WORD;
3513 else if (c == FLOAT_REGS)
3514 reg_size = UNITS_PER_FP_WORD;
3516 else
3517 reg_size = UNITS_PER_WORD;
3519 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3521 machine_mode m2 = (machine_mode)m;
3522 int reg_size2 = reg_size;
3524 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3525 in VSX. */
3526 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3527 reg_size2 = UNITS_PER_FP_WORD;
3529 rs6000_class_max_nregs[m][c]
3530 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3534 if (TARGET_E500_DOUBLE)
3535 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3537 /* Calculate which modes to automatically generate code to use a the
3538 reciprocal divide and square root instructions. In the future, possibly
3539 automatically generate the instructions even if the user did not specify
3540 -mrecip. The older machines double precision reciprocal sqrt estimate is
3541 not accurate enough. */
3542 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3543 if (TARGET_FRES)
3544 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3545 if (TARGET_FRE)
3546 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3547 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3548 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3549 if (VECTOR_UNIT_VSX_P (V2DFmode))
3550 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3552 if (TARGET_FRSQRTES)
3553 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3554 if (TARGET_FRSQRTE)
3555 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3556 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3557 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3558 if (VECTOR_UNIT_VSX_P (V2DFmode))
3559 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3561 if (rs6000_recip_control)
3563 if (!flag_finite_math_only)
3564 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3565 if (flag_trapping_math)
3566 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3567 if (!flag_reciprocal_math)
3568 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3569 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3571 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3572 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3573 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3575 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3576 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3577 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3579 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3580 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3581 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3583 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3584 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3585 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3587 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3588 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3589 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3591 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3592 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3593 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3595 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3596 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3597 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3599 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3600 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3601 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3605 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3606 legitimate address support to figure out the appropriate addressing to
3607 use. */
3608 rs6000_setup_reg_addr_masks ();
3610 if (global_init_p || TARGET_DEBUG_TARGET)
3612 if (TARGET_DEBUG_REG)
3613 rs6000_debug_reg_global ();
3615 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3616 fprintf (stderr,
3617 "SImode variable mult cost = %d\n"
3618 "SImode constant mult cost = %d\n"
3619 "SImode short constant mult cost = %d\n"
3620 "DImode multipliciation cost = %d\n"
3621 "SImode division cost = %d\n"
3622 "DImode division cost = %d\n"
3623 "Simple fp operation cost = %d\n"
3624 "DFmode multiplication cost = %d\n"
3625 "SFmode division cost = %d\n"
3626 "DFmode division cost = %d\n"
3627 "cache line size = %d\n"
3628 "l1 cache size = %d\n"
3629 "l2 cache size = %d\n"
3630 "simultaneous prefetches = %d\n"
3631 "\n",
3632 rs6000_cost->mulsi,
3633 rs6000_cost->mulsi_const,
3634 rs6000_cost->mulsi_const9,
3635 rs6000_cost->muldi,
3636 rs6000_cost->divsi,
3637 rs6000_cost->divdi,
3638 rs6000_cost->fp,
3639 rs6000_cost->dmul,
3640 rs6000_cost->sdiv,
3641 rs6000_cost->ddiv,
3642 rs6000_cost->cache_line_size,
3643 rs6000_cost->l1_cache_size,
3644 rs6000_cost->l2_cache_size,
3645 rs6000_cost->simultaneous_prefetches);
3649 #if TARGET_MACHO
3650 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3652 static void
3653 darwin_rs6000_override_options (void)
3655 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3656 off. */
3657 rs6000_altivec_abi = 1;
3658 TARGET_ALTIVEC_VRSAVE = 1;
3659 rs6000_current_abi = ABI_DARWIN;
3661 if (DEFAULT_ABI == ABI_DARWIN
3662 && TARGET_64BIT)
3663 darwin_one_byte_bool = 1;
3665 if (TARGET_64BIT && ! TARGET_POWERPC64)
3667 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3668 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3670 if (flag_mkernel)
3672 rs6000_default_long_calls = 1;
3673 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3676 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3677 Altivec. */
3678 if (!flag_mkernel && !flag_apple_kext
3679 && TARGET_64BIT
3680 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3681 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3683 /* Unless the user (not the configurer) has explicitly overridden
3684 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3685 G4 unless targeting the kernel. */
3686 if (!flag_mkernel
3687 && !flag_apple_kext
3688 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3689 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3690 && ! global_options_set.x_rs6000_cpu_index)
3692 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3695 #endif
3697 /* If not otherwise specified by a target, make 'long double' equivalent to
3698 'double'. */
3700 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3701 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3702 #endif
3704 /* Return the builtin mask of the various options used that could affect which
3705 builtins were used. In the past we used target_flags, but we've run out of
3706 bits, and some options like SPE and PAIRED are no longer in
3707 target_flags. */
3709 HOST_WIDE_INT
3710 rs6000_builtin_mask_calculate (void)
3712 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3713 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3714 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3715 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3716 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3717 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3718 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3719 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3720 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3721 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3722 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3723 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3724 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3725 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3726 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3727 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3728 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3729 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3730 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3731 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3732 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3735 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3736 to clobber the XER[CA] bit because clobbering that bit without telling
3737 the compiler worked just fine with versions of GCC before GCC 5, and
3738 breaking a lot of older code in ways that are hard to track down is
3739 not such a great idea. */
3741 static rtx_insn *
3742 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3743 vec<const char *> &/*constraints*/,
3744 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3746 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3747 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3748 return NULL;
3751 /* Override command line options. Mostly we process the processor type and
3752 sometimes adjust other TARGET_ options. */
3754 static bool
3755 rs6000_option_override_internal (bool global_init_p)
3757 bool ret = true;
3758 bool have_cpu = false;
3760 /* The default cpu requested at configure time, if any. */
3761 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3763 HOST_WIDE_INT set_masks;
3764 int cpu_index;
3765 int tune_index;
3766 struct cl_target_option *main_target_opt
3767 = ((global_init_p || target_option_default_node == NULL)
3768 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3770 /* Print defaults. */
3771 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3772 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3774 /* Remember the explicit arguments. */
3775 if (global_init_p)
3776 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3778 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3779 library functions, so warn about it. The flag may be useful for
3780 performance studies from time to time though, so don't disable it
3781 entirely. */
3782 if (global_options_set.x_rs6000_alignment_flags
3783 && rs6000_alignment_flags == MASK_ALIGN_POWER
3784 && DEFAULT_ABI == ABI_DARWIN
3785 && TARGET_64BIT)
3786 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3787 " it is incompatible with the installed C and C++ libraries");
3789 /* Numerous experiment shows that IRA based loop pressure
3790 calculation works better for RTL loop invariant motion on targets
3791 with enough (>= 32) registers. It is an expensive optimization.
3792 So it is on only for peak performance. */
3793 if (optimize >= 3 && global_init_p
3794 && !global_options_set.x_flag_ira_loop_pressure)
3795 flag_ira_loop_pressure = 1;
3797 /* Set the pointer size. */
3798 if (TARGET_64BIT)
3800 rs6000_pmode = (int)DImode;
3801 rs6000_pointer_size = 64;
3803 else
3805 rs6000_pmode = (int)SImode;
3806 rs6000_pointer_size = 32;
3809 /* Some OSs don't support saving the high part of 64-bit registers on context
3810 switch. Other OSs don't support saving Altivec registers. On those OSs,
3811 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3812 if the user wants either, the user must explicitly specify them and we
3813 won't interfere with the user's specification. */
3815 set_masks = POWERPC_MASKS;
3816 #ifdef OS_MISSING_POWERPC64
3817 if (OS_MISSING_POWERPC64)
3818 set_masks &= ~OPTION_MASK_POWERPC64;
3819 #endif
3820 #ifdef OS_MISSING_ALTIVEC
3821 if (OS_MISSING_ALTIVEC)
3822 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3823 #endif
3825 /* Don't override by the processor default if given explicitly. */
3826 set_masks &= ~rs6000_isa_flags_explicit;
3828 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3829 the cpu in a target attribute or pragma, but did not specify a tuning
3830 option, use the cpu for the tuning option rather than the option specified
3831 with -mtune on the command line. Process a '--with-cpu' configuration
3832 request as an implicit --cpu. */
3833 if (rs6000_cpu_index >= 0)
3835 cpu_index = rs6000_cpu_index;
3836 have_cpu = true;
3838 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3840 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3841 have_cpu = true;
3843 else if (implicit_cpu)
3845 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3846 have_cpu = true;
3848 else
3850 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3851 const char *default_cpu = ((!TARGET_POWERPC64)
3852 ? "powerpc"
3853 : ((BYTES_BIG_ENDIAN)
3854 ? "powerpc64"
3855 : "powerpc64le"));
3857 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3858 have_cpu = false;
3861 gcc_assert (cpu_index >= 0);
3863 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3864 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3865 with those from the cpu, except for options that were explicitly set. If
3866 we don't have a cpu, do not override the target bits set in
3867 TARGET_DEFAULT. */
3868 if (have_cpu)
3870 rs6000_isa_flags &= ~set_masks;
3871 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3872 & set_masks);
3874 else
3876 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3877 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3878 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3879 to using rs6000_isa_flags, we need to do the initialization here.
3881 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3882 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3883 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3884 : processor_target_table[cpu_index].target_enable);
3885 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3888 if (rs6000_tune_index >= 0)
3889 tune_index = rs6000_tune_index;
3890 else if (have_cpu)
3891 rs6000_tune_index = tune_index = cpu_index;
3892 else
3894 size_t i;
3895 enum processor_type tune_proc
3896 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3898 tune_index = -1;
3899 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3900 if (processor_target_table[i].processor == tune_proc)
3902 rs6000_tune_index = tune_index = i;
3903 break;
3907 gcc_assert (tune_index >= 0);
3908 rs6000_cpu = processor_target_table[tune_index].processor;
3910 /* Pick defaults for SPE related control flags. Do this early to make sure
3911 that the TARGET_ macros are representative ASAP. */
3913 int spe_capable_cpu =
3914 (rs6000_cpu == PROCESSOR_PPC8540
3915 || rs6000_cpu == PROCESSOR_PPC8548);
3917 if (!global_options_set.x_rs6000_spe_abi)
3918 rs6000_spe_abi = spe_capable_cpu;
3920 if (!global_options_set.x_rs6000_spe)
3921 rs6000_spe = spe_capable_cpu;
3923 if (!global_options_set.x_rs6000_float_gprs)
3924 rs6000_float_gprs =
3925 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3926 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3927 : 0);
3930 if (global_options_set.x_rs6000_spe_abi
3931 && rs6000_spe_abi
3932 && !TARGET_SPE_ABI)
3933 error ("not configured for SPE ABI");
3935 if (global_options_set.x_rs6000_spe
3936 && rs6000_spe
3937 && !TARGET_SPE)
3938 error ("not configured for SPE instruction set");
3940 if (main_target_opt != NULL
3941 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3942 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3943 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3944 error ("target attribute or pragma changes SPE ABI");
3946 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3947 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3948 || rs6000_cpu == PROCESSOR_PPCE5500)
3950 if (TARGET_ALTIVEC)
3951 error ("AltiVec not supported in this target");
3952 if (TARGET_SPE)
3953 error ("SPE not supported in this target");
3955 if (rs6000_cpu == PROCESSOR_PPCE6500)
3957 if (TARGET_SPE)
3958 error ("SPE not supported in this target");
3961 /* Disable Cell microcode if we are optimizing for the Cell
3962 and not optimizing for size. */
3963 if (rs6000_gen_cell_microcode == -1)
3964 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3965 && !optimize_size);
3967 /* If we are optimizing big endian systems for space and it's OK to
3968 use instructions that would be microcoded on the Cell, use the
3969 load/store multiple and string instructions. */
3970 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3971 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3972 | OPTION_MASK_STRING);
3974 /* Don't allow -mmultiple or -mstring on little endian systems
3975 unless the cpu is a 750, because the hardware doesn't support the
3976 instructions used in little endian mode, and causes an alignment
3977 trap. The 750 does not cause an alignment trap (except when the
3978 target is unaligned). */
3980 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3982 if (TARGET_MULTIPLE)
3984 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3985 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3986 warning (0, "-mmultiple is not supported on little endian systems");
3989 if (TARGET_STRING)
3991 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3992 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3993 warning (0, "-mstring is not supported on little endian systems");
3997 /* If little-endian, default to -mstrict-align on older processors.
3998 Testing for htm matches power8 and later. */
3999 if (!BYTES_BIG_ENDIAN
4000 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4001 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4003 /* -maltivec={le,be} implies -maltivec. */
4004 if (rs6000_altivec_element_order != 0)
4005 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4007 /* Disallow -maltivec=le in big endian mode for now. This is not
4008 known to be useful for anyone. */
4009 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4011 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4012 rs6000_altivec_element_order = 0;
4015 /* Add some warnings for VSX. */
4016 if (TARGET_VSX)
4018 const char *msg = NULL;
4019 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4020 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4022 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4023 msg = N_("-mvsx requires hardware floating point");
4024 else
4026 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4027 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4030 else if (TARGET_PAIRED_FLOAT)
4031 msg = N_("-mvsx and -mpaired are incompatible");
4032 else if (TARGET_AVOID_XFORM > 0)
4033 msg = N_("-mvsx needs indexed addressing");
4034 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4035 & OPTION_MASK_ALTIVEC))
4037 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4038 msg = N_("-mvsx and -mno-altivec are incompatible");
4039 else
4040 msg = N_("-mno-altivec disables vsx");
4043 if (msg)
4045 warning (0, msg);
4046 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4047 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4051 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4052 the -mcpu setting to enable options that conflict. */
4053 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4054 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4055 | OPTION_MASK_ALTIVEC
4056 | OPTION_MASK_VSX)) != 0)
4057 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4058 | OPTION_MASK_DIRECT_MOVE)
4059 & ~rs6000_isa_flags_explicit);
4061 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4062 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4064 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4065 unless the user explicitly used the -mno-<option> to disable the code. */
4066 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4067 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4068 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4069 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4070 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4071 else if (TARGET_VSX)
4072 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4073 else if (TARGET_POPCNTD)
4074 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4075 else if (TARGET_DFP)
4076 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4077 else if (TARGET_CMPB)
4078 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4079 else if (TARGET_FPRND)
4080 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4081 else if (TARGET_POPCNTB)
4082 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4083 else if (TARGET_ALTIVEC)
4084 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4086 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4088 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4089 error ("-mcrypto requires -maltivec");
4090 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4093 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4095 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4096 error ("-mdirect-move requires -mvsx");
4097 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4100 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4102 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4103 error ("-mpower8-vector requires -maltivec");
4104 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4107 if (TARGET_P8_VECTOR && !TARGET_VSX)
4109 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4110 error ("-mpower8-vector requires -mvsx");
4111 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4114 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4116 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4117 error ("-mvsx-timode requires -mvsx");
4118 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4121 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4123 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4124 error ("-mhard-dfp requires -mhard-float");
4125 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4128 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4129 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4130 set the individual option. */
4131 if (TARGET_UPPER_REGS > 0)
4133 if (TARGET_VSX
4134 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4136 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4137 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4139 if (TARGET_VSX
4140 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4142 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4143 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4145 if (TARGET_P8_VECTOR
4146 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4148 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4149 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4152 else if (TARGET_UPPER_REGS == 0)
4154 if (TARGET_VSX
4155 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4157 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4158 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4160 if (TARGET_VSX
4161 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4163 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4164 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4166 if (TARGET_P8_VECTOR
4167 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4169 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4170 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4174 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4176 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4177 error ("-mupper-regs-df requires -mvsx");
4178 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4181 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4183 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4184 error ("-mupper-regs-di requires -mvsx");
4185 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4188 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4190 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4191 error ("-mupper-regs-sf requires -mpower8-vector");
4192 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4195 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4196 silently turn off quad memory mode. */
4197 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4199 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4200 warning (0, N_("-mquad-memory requires 64-bit mode"));
4202 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4203 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4205 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4206 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4209 /* Non-atomic quad memory load/store are disabled for little endian, since
4210 the words are reversed, but atomic operations can still be done by
4211 swapping the words. */
4212 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4214 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4215 warning (0, N_("-mquad-memory is not available in little endian mode"));
4217 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4220 /* Assume if the user asked for normal quad memory instructions, they want
4221 the atomic versions as well, unless they explicity told us not to use quad
4222 word atomic instructions. */
4223 if (TARGET_QUAD_MEMORY
4224 && !TARGET_QUAD_MEMORY_ATOMIC
4225 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4226 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4228 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4229 generating power8 instructions. */
4230 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4231 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4232 & OPTION_MASK_P8_FUSION);
4234 /* Setting additional fusion flags turns on base fusion. */
4235 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4237 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4239 if (TARGET_P8_FUSION_SIGN)
4240 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4242 if (TARGET_TOC_FUSION)
4243 error ("-mtoc-fusion requires -mpower8-fusion");
4245 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4247 else
4248 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4251 /* Power9 fusion is a superset over power8 fusion. */
4252 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4254 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4256 /* We prefer to not mention undocumented options in
4257 error messages. However, if users have managed to select
4258 power9-fusion without selecting power8-fusion, they
4259 already know about undocumented flags. */
4260 error ("-mpower9-fusion requires -mpower8-fusion");
4261 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4263 else
4264 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4267 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4268 generating power9 instructions. */
4269 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4270 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4271 & OPTION_MASK_P9_FUSION);
4273 /* Power8 does not fuse sign extended loads with the addis. If we are
4274 optimizing at high levels for speed, convert a sign extended load into a
4275 zero extending load, and an explicit sign extension. */
4276 if (TARGET_P8_FUSION
4277 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4278 && optimize_function_for_speed_p (cfun)
4279 && optimize >= 3)
4280 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4282 /* TOC fusion requires 64-bit and medium/large code model. */
4283 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4285 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4286 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4287 warning (0, N_("-mtoc-fusion requires 64-bit"));
4290 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4292 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4293 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4294 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4297 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4298 model. */
4299 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4300 && (TARGET_CMODEL != CMODEL_SMALL)
4301 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4302 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4304 /* ISA 3.0 vector instructions include ISA 2.07. */
4305 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4307 /* We prefer to not mention undocumented options in
4308 error messages. However, if users have managed to select
4309 power9-vector without selecting power8-vector, they
4310 already know about undocumented flags. */
4311 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4312 error ("-mpower9-vector requires -mpower8-vector");
4313 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4316 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4317 -mpower9-dform-vector. */
4318 if (TARGET_P9_DFORM_BOTH > 0)
4320 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4321 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4323 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4324 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4326 else if (TARGET_P9_DFORM_BOTH == 0)
4328 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4329 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4331 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4332 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4335 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4336 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4338 /* We prefer to not mention undocumented options in
4339 error messages. However, if users have managed to select
4340 power9-dform without selecting power9-vector, they
4341 already know about undocumented flags. */
4342 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4343 error ("-mpower9-dform requires -mpower9-vector");
4344 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4345 | OPTION_MASK_P9_DFORM_VECTOR);
4348 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4350 /* We prefer to not mention undocumented options in
4351 error messages. However, if users have managed to select
4352 power9-dform without selecting upper-regs-df, they
4353 already know about undocumented flags. */
4354 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4355 error ("-mpower9-dform requires -mupper-regs-df");
4356 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4359 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4361 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4362 error ("-mpower9-dform requires -mupper-regs-sf");
4363 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4366 /* Enable LRA by default. */
4367 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4368 rs6000_isa_flags |= OPTION_MASK_LRA;
4370 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4371 but do show up with -mno-lra. Given -mlra will become the default once
4372 PR 69847 is fixed, turn off the options with problems by default if
4373 -mno-lra was used, and warn if the user explicitly asked for the option.
4375 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4376 Enable -mvsx-timode by default if LRA and VSX. */
4377 if (!TARGET_LRA)
4379 if (TARGET_VSX_TIMODE)
4381 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4382 warning (0, "-mvsx-timode might need -mlra");
4384 else
4385 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4389 else
4391 if (TARGET_VSX && !TARGET_VSX_TIMODE
4392 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4393 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4396 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4397 support. If we only have ISA 2.06 support, and the user did not specify
4398 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4399 but we don't enable the full vectorization support */
4400 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4401 TARGET_ALLOW_MOVMISALIGN = 1;
4403 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4405 if (TARGET_ALLOW_MOVMISALIGN > 0
4406 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4407 error ("-mallow-movmisalign requires -mvsx");
4409 TARGET_ALLOW_MOVMISALIGN = 0;
4412 /* Determine when unaligned vector accesses are permitted, and when
4413 they are preferred over masked Altivec loads. Note that if
4414 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4415 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4416 not true. */
4417 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4419 if (!TARGET_VSX)
4421 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4422 error ("-mefficient-unaligned-vsx requires -mvsx");
4424 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4427 else if (!TARGET_ALLOW_MOVMISALIGN)
4429 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4430 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4432 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4436 /* Set long double size before the IEEE 128-bit tests. */
4437 if (!global_options_set.x_rs6000_long_double_type_size)
4439 if (main_target_opt != NULL
4440 && (main_target_opt->x_rs6000_long_double_type_size
4441 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4442 error ("target attribute or pragma changes long double size");
4443 else
4444 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4447 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4448 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4449 pick up this default. */
4450 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4451 if (!global_options_set.x_rs6000_ieeequad)
4452 rs6000_ieeequad = 1;
4453 #endif
4455 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4456 sytems, but don't enable the __float128 keyword. */
4457 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4458 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4459 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4460 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4462 /* IEEE 128-bit floating point requires VSX support. */
4463 if (!TARGET_VSX)
4465 if (TARGET_FLOAT128_KEYWORD)
4467 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4468 error ("-mfloat128 requires VSX support");
4470 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4471 | OPTION_MASK_FLOAT128_KEYWORD
4472 | OPTION_MASK_FLOAT128_HW);
4475 else if (TARGET_FLOAT128_TYPE)
4477 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4478 error ("-mfloat128-type requires VSX support");
4480 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4481 | OPTION_MASK_FLOAT128_KEYWORD
4482 | OPTION_MASK_FLOAT128_HW);
4486 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4487 128-bit floating point support to be enabled. */
4488 if (!TARGET_FLOAT128_TYPE)
4490 if (TARGET_FLOAT128_KEYWORD)
4492 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4494 error ("-mfloat128 requires -mfloat128-type");
4495 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4496 | OPTION_MASK_FLOAT128_KEYWORD
4497 | OPTION_MASK_FLOAT128_HW);
4499 else
4500 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4503 if (TARGET_FLOAT128_HW)
4505 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4507 error ("-mfloat128-hardware requires -mfloat128-type");
4508 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4510 else
4511 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4512 | OPTION_MASK_FLOAT128_KEYWORD
4513 | OPTION_MASK_FLOAT128_HW);
4517 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4518 -mfloat128-hardware by default. However, don't enable the __float128
4519 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4520 -mfloat128 option as well if it was not already set. */
4521 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4522 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4523 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4524 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4526 if (TARGET_FLOAT128_HW
4527 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4529 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4530 error ("-mfloat128-hardware requires full ISA 3.0 support");
4532 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4535 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4536 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4537 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4538 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4540 /* Print the options after updating the defaults. */
4541 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4542 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4544 /* E500mc does "better" if we inline more aggressively. Respect the
4545 user's opinion, though. */
4546 if (rs6000_block_move_inline_limit == 0
4547 && (rs6000_cpu == PROCESSOR_PPCE500MC
4548 || rs6000_cpu == PROCESSOR_PPCE500MC64
4549 || rs6000_cpu == PROCESSOR_PPCE5500
4550 || rs6000_cpu == PROCESSOR_PPCE6500))
4551 rs6000_block_move_inline_limit = 128;
4553 /* store_one_arg depends on expand_block_move to handle at least the
4554 size of reg_parm_stack_space. */
4555 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4556 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4558 if (global_init_p)
4560 /* If the appropriate debug option is enabled, replace the target hooks
4561 with debug versions that call the real version and then prints
4562 debugging information. */
4563 if (TARGET_DEBUG_COST)
4565 targetm.rtx_costs = rs6000_debug_rtx_costs;
4566 targetm.address_cost = rs6000_debug_address_cost;
4567 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4570 if (TARGET_DEBUG_ADDR)
4572 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4573 targetm.legitimize_address = rs6000_debug_legitimize_address;
4574 rs6000_secondary_reload_class_ptr
4575 = rs6000_debug_secondary_reload_class;
4576 rs6000_secondary_memory_needed_ptr
4577 = rs6000_debug_secondary_memory_needed;
4578 rs6000_cannot_change_mode_class_ptr
4579 = rs6000_debug_cannot_change_mode_class;
4580 rs6000_preferred_reload_class_ptr
4581 = rs6000_debug_preferred_reload_class;
4582 rs6000_legitimize_reload_address_ptr
4583 = rs6000_debug_legitimize_reload_address;
4584 rs6000_mode_dependent_address_ptr
4585 = rs6000_debug_mode_dependent_address;
4588 if (rs6000_veclibabi_name)
4590 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4591 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4592 else
4594 error ("unknown vectorization library ABI type (%s) for "
4595 "-mveclibabi= switch", rs6000_veclibabi_name);
4596 ret = false;
4601 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4602 target attribute or pragma which automatically enables both options,
4603 unless the altivec ABI was set. This is set by default for 64-bit, but
4604 not for 32-bit. */
4605 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4606 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4607 | OPTION_MASK_FLOAT128_TYPE
4608 | OPTION_MASK_FLOAT128_KEYWORD)
4609 & ~rs6000_isa_flags_explicit);
4611 /* Enable Altivec ABI for AIX -maltivec. */
4612 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4614 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4615 error ("target attribute or pragma changes AltiVec ABI");
4616 else
4617 rs6000_altivec_abi = 1;
4620 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4621 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4622 be explicitly overridden in either case. */
4623 if (TARGET_ELF)
4625 if (!global_options_set.x_rs6000_altivec_abi
4626 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4628 if (main_target_opt != NULL &&
4629 !main_target_opt->x_rs6000_altivec_abi)
4630 error ("target attribute or pragma changes AltiVec ABI");
4631 else
4632 rs6000_altivec_abi = 1;
4636 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4637 So far, the only darwin64 targets are also MACH-O. */
4638 if (TARGET_MACHO
4639 && DEFAULT_ABI == ABI_DARWIN
4640 && TARGET_64BIT)
4642 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4643 error ("target attribute or pragma changes darwin64 ABI");
4644 else
4646 rs6000_darwin64_abi = 1;
4647 /* Default to natural alignment, for better performance. */
4648 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4652 /* Place FP constants in the constant pool instead of TOC
4653 if section anchors enabled. */
4654 if (flag_section_anchors
4655 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4656 TARGET_NO_FP_IN_TOC = 1;
4658 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4659 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4661 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4662 SUBTARGET_OVERRIDE_OPTIONS;
4663 #endif
4664 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4665 SUBSUBTARGET_OVERRIDE_OPTIONS;
4666 #endif
4667 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4668 SUB3TARGET_OVERRIDE_OPTIONS;
4669 #endif
4671 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4672 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4674 /* For the E500 family of cores, reset the single/double FP flags to let us
4675 check that they remain constant across attributes or pragmas. Also,
4676 clear a possible request for string instructions, not supported and which
4677 we might have silently queried above for -Os.
4679 For other families, clear ISEL in case it was set implicitly.
4682 switch (rs6000_cpu)
4684 case PROCESSOR_PPC8540:
4685 case PROCESSOR_PPC8548:
4686 case PROCESSOR_PPCE500MC:
4687 case PROCESSOR_PPCE500MC64:
4688 case PROCESSOR_PPCE5500:
4689 case PROCESSOR_PPCE6500:
4691 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4692 rs6000_double_float = TARGET_E500_DOUBLE;
4694 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4696 break;
4698 default:
4700 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4701 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4703 break;
4706 if (main_target_opt)
4708 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4709 error ("target attribute or pragma changes single precision floating "
4710 "point");
4711 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4712 error ("target attribute or pragma changes double precision floating "
4713 "point");
4716 /* Detect invalid option combinations with E500. */
4717 CHECK_E500_OPTIONS;
4719 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4720 && rs6000_cpu != PROCESSOR_POWER5
4721 && rs6000_cpu != PROCESSOR_POWER6
4722 && rs6000_cpu != PROCESSOR_POWER7
4723 && rs6000_cpu != PROCESSOR_POWER8
4724 && rs6000_cpu != PROCESSOR_POWER9
4725 && rs6000_cpu != PROCESSOR_PPCA2
4726 && rs6000_cpu != PROCESSOR_CELL
4727 && rs6000_cpu != PROCESSOR_PPC476);
4728 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4729 || rs6000_cpu == PROCESSOR_POWER5
4730 || rs6000_cpu == PROCESSOR_POWER7
4731 || rs6000_cpu == PROCESSOR_POWER8);
4732 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4733 || rs6000_cpu == PROCESSOR_POWER5
4734 || rs6000_cpu == PROCESSOR_POWER6
4735 || rs6000_cpu == PROCESSOR_POWER7
4736 || rs6000_cpu == PROCESSOR_POWER8
4737 || rs6000_cpu == PROCESSOR_POWER9
4738 || rs6000_cpu == PROCESSOR_PPCE500MC
4739 || rs6000_cpu == PROCESSOR_PPCE500MC64
4740 || rs6000_cpu == PROCESSOR_PPCE5500
4741 || rs6000_cpu == PROCESSOR_PPCE6500);
4743 /* Allow debug switches to override the above settings. These are set to -1
4744 in rs6000.opt to indicate the user hasn't directly set the switch. */
4745 if (TARGET_ALWAYS_HINT >= 0)
4746 rs6000_always_hint = TARGET_ALWAYS_HINT;
4748 if (TARGET_SCHED_GROUPS >= 0)
4749 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4751 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4752 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4754 rs6000_sched_restricted_insns_priority
4755 = (rs6000_sched_groups ? 1 : 0);
4757 /* Handle -msched-costly-dep option. */
4758 rs6000_sched_costly_dep
4759 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4761 if (rs6000_sched_costly_dep_str)
4763 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4764 rs6000_sched_costly_dep = no_dep_costly;
4765 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4766 rs6000_sched_costly_dep = all_deps_costly;
4767 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4768 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4769 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4770 rs6000_sched_costly_dep = store_to_load_dep_costly;
4771 else
4772 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4773 atoi (rs6000_sched_costly_dep_str));
4776 /* Handle -minsert-sched-nops option. */
4777 rs6000_sched_insert_nops
4778 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4780 if (rs6000_sched_insert_nops_str)
4782 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4783 rs6000_sched_insert_nops = sched_finish_none;
4784 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4785 rs6000_sched_insert_nops = sched_finish_pad_groups;
4786 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4787 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4788 else
4789 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4790 atoi (rs6000_sched_insert_nops_str));
4793 if (global_init_p)
4795 #ifdef TARGET_REGNAMES
4796 /* If the user desires alternate register names, copy in the
4797 alternate names now. */
4798 if (TARGET_REGNAMES)
4799 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4800 #endif
4802 /* Set aix_struct_return last, after the ABI is determined.
4803 If -maix-struct-return or -msvr4-struct-return was explicitly
4804 used, don't override with the ABI default. */
4805 if (!global_options_set.x_aix_struct_return)
4806 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4808 #if 0
4809 /* IBM XL compiler defaults to unsigned bitfields. */
4810 if (TARGET_XL_COMPAT)
4811 flag_signed_bitfields = 0;
4812 #endif
4814 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4815 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4817 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4819 /* We can only guarantee the availability of DI pseudo-ops when
4820 assembling for 64-bit targets. */
4821 if (!TARGET_64BIT)
4823 targetm.asm_out.aligned_op.di = NULL;
4824 targetm.asm_out.unaligned_op.di = NULL;
4828 /* Set branch target alignment, if not optimizing for size. */
4829 if (!optimize_size)
4831 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4832 aligned 8byte to avoid misprediction by the branch predictor. */
4833 if (rs6000_cpu == PROCESSOR_TITAN
4834 || rs6000_cpu == PROCESSOR_CELL)
4836 if (align_functions <= 0)
4837 align_functions = 8;
4838 if (align_jumps <= 0)
4839 align_jumps = 8;
4840 if (align_loops <= 0)
4841 align_loops = 8;
4843 if (rs6000_align_branch_targets)
4845 if (align_functions <= 0)
4846 align_functions = 16;
4847 if (align_jumps <= 0)
4848 align_jumps = 16;
4849 if (align_loops <= 0)
4851 can_override_loop_align = 1;
4852 align_loops = 16;
4855 if (align_jumps_max_skip <= 0)
4856 align_jumps_max_skip = 15;
4857 if (align_loops_max_skip <= 0)
4858 align_loops_max_skip = 15;
4861 /* Arrange to save and restore machine status around nested functions. */
4862 init_machine_status = rs6000_init_machine_status;
4864 /* We should always be splitting complex arguments, but we can't break
4865 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4866 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4867 targetm.calls.split_complex_arg = NULL;
4869 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4870 if (DEFAULT_ABI == ABI_AIX)
4871 targetm.calls.custom_function_descriptors = 0;
4874 /* Initialize rs6000_cost with the appropriate target costs. */
4875 if (optimize_size)
4876 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4877 else
4878 switch (rs6000_cpu)
4880 case PROCESSOR_RS64A:
4881 rs6000_cost = &rs64a_cost;
4882 break;
4884 case PROCESSOR_MPCCORE:
4885 rs6000_cost = &mpccore_cost;
4886 break;
4888 case PROCESSOR_PPC403:
4889 rs6000_cost = &ppc403_cost;
4890 break;
4892 case PROCESSOR_PPC405:
4893 rs6000_cost = &ppc405_cost;
4894 break;
4896 case PROCESSOR_PPC440:
4897 rs6000_cost = &ppc440_cost;
4898 break;
4900 case PROCESSOR_PPC476:
4901 rs6000_cost = &ppc476_cost;
4902 break;
4904 case PROCESSOR_PPC601:
4905 rs6000_cost = &ppc601_cost;
4906 break;
4908 case PROCESSOR_PPC603:
4909 rs6000_cost = &ppc603_cost;
4910 break;
4912 case PROCESSOR_PPC604:
4913 rs6000_cost = &ppc604_cost;
4914 break;
4916 case PROCESSOR_PPC604e:
4917 rs6000_cost = &ppc604e_cost;
4918 break;
4920 case PROCESSOR_PPC620:
4921 rs6000_cost = &ppc620_cost;
4922 break;
4924 case PROCESSOR_PPC630:
4925 rs6000_cost = &ppc630_cost;
4926 break;
4928 case PROCESSOR_CELL:
4929 rs6000_cost = &ppccell_cost;
4930 break;
4932 case PROCESSOR_PPC750:
4933 case PROCESSOR_PPC7400:
4934 rs6000_cost = &ppc750_cost;
4935 break;
4937 case PROCESSOR_PPC7450:
4938 rs6000_cost = &ppc7450_cost;
4939 break;
4941 case PROCESSOR_PPC8540:
4942 case PROCESSOR_PPC8548:
4943 rs6000_cost = &ppc8540_cost;
4944 break;
4946 case PROCESSOR_PPCE300C2:
4947 case PROCESSOR_PPCE300C3:
4948 rs6000_cost = &ppce300c2c3_cost;
4949 break;
4951 case PROCESSOR_PPCE500MC:
4952 rs6000_cost = &ppce500mc_cost;
4953 break;
4955 case PROCESSOR_PPCE500MC64:
4956 rs6000_cost = &ppce500mc64_cost;
4957 break;
4959 case PROCESSOR_PPCE5500:
4960 rs6000_cost = &ppce5500_cost;
4961 break;
4963 case PROCESSOR_PPCE6500:
4964 rs6000_cost = &ppce6500_cost;
4965 break;
4967 case PROCESSOR_TITAN:
4968 rs6000_cost = &titan_cost;
4969 break;
4971 case PROCESSOR_POWER4:
4972 case PROCESSOR_POWER5:
4973 rs6000_cost = &power4_cost;
4974 break;
4976 case PROCESSOR_POWER6:
4977 rs6000_cost = &power6_cost;
4978 break;
4980 case PROCESSOR_POWER7:
4981 rs6000_cost = &power7_cost;
4982 break;
4984 case PROCESSOR_POWER8:
4985 rs6000_cost = &power8_cost;
4986 break;
4988 case PROCESSOR_POWER9:
4989 rs6000_cost = &power9_cost;
4990 break;
4992 case PROCESSOR_PPCA2:
4993 rs6000_cost = &ppca2_cost;
4994 break;
4996 default:
4997 gcc_unreachable ();
5000 if (global_init_p)
5002 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5003 rs6000_cost->simultaneous_prefetches,
5004 global_options.x_param_values,
5005 global_options_set.x_param_values);
5006 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5007 global_options.x_param_values,
5008 global_options_set.x_param_values);
5009 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5010 rs6000_cost->cache_line_size,
5011 global_options.x_param_values,
5012 global_options_set.x_param_values);
5013 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5014 global_options.x_param_values,
5015 global_options_set.x_param_values);
5017 /* Increase loop peeling limits based on performance analysis. */
5018 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5019 global_options.x_param_values,
5020 global_options_set.x_param_values);
5021 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5022 global_options.x_param_values,
5023 global_options_set.x_param_values);
5025 /* If using typedef char *va_list, signal that
5026 __builtin_va_start (&ap, 0) can be optimized to
5027 ap = __builtin_next_arg (0). */
5028 if (DEFAULT_ABI != ABI_V4)
5029 targetm.expand_builtin_va_start = NULL;
5032 /* Set up single/double float flags.
5033 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5034 then set both flags. */
5035 if (TARGET_HARD_FLOAT && TARGET_FPRS
5036 && rs6000_single_float == 0 && rs6000_double_float == 0)
5037 rs6000_single_float = rs6000_double_float = 1;
5039 /* If not explicitly specified via option, decide whether to generate indexed
5040 load/store instructions. */
5041 if (TARGET_AVOID_XFORM == -1)
5042 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5043 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5044 need indexed accesses and the type used is the scalar type of the element
5045 being loaded or stored. */
5046 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5047 && !TARGET_ALTIVEC);
5049 /* Set the -mrecip options. */
5050 if (rs6000_recip_name)
5052 char *p = ASTRDUP (rs6000_recip_name);
5053 char *q;
5054 unsigned int mask, i;
5055 bool invert;
5057 while ((q = strtok (p, ",")) != NULL)
5059 p = NULL;
5060 if (*q == '!')
5062 invert = true;
5063 q++;
5065 else
5066 invert = false;
5068 if (!strcmp (q, "default"))
5069 mask = ((TARGET_RECIP_PRECISION)
5070 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5071 else
5073 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5074 if (!strcmp (q, recip_options[i].string))
5076 mask = recip_options[i].mask;
5077 break;
5080 if (i == ARRAY_SIZE (recip_options))
5082 error ("unknown option for -mrecip=%s", q);
5083 invert = false;
5084 mask = 0;
5085 ret = false;
5089 if (invert)
5090 rs6000_recip_control &= ~mask;
5091 else
5092 rs6000_recip_control |= mask;
5096 /* Set the builtin mask of the various options used that could affect which
5097 builtins were used. In the past we used target_flags, but we've run out
5098 of bits, and some options like SPE and PAIRED are no longer in
5099 target_flags. */
5100 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5101 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5102 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5103 rs6000_builtin_mask);
5105 /* Initialize all of the registers. */
5106 rs6000_init_hard_regno_mode_ok (global_init_p);
5108 /* Save the initial options in case the user does function specific options */
5109 if (global_init_p)
5110 target_option_default_node = target_option_current_node
5111 = build_target_option_node (&global_options);
5113 /* If not explicitly specified via option, decide whether to generate the
5114 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5115 if (TARGET_LINK_STACK == -1)
5116 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5118 return ret;
5121 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5122 define the target cpu type. */
5124 static void
5125 rs6000_option_override (void)
5127 (void) rs6000_option_override_internal (true);
5129 /* Register machine-specific passes. This needs to be done at start-up.
5130 It's convenient to do it here (like i386 does). */
5131 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
5133 struct register_pass_info analyze_swaps_info
5134 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
5136 register_pass (&analyze_swaps_info);
5140 /* Implement targetm.vectorize.builtin_mask_for_load. */
5141 static tree
5142 rs6000_builtin_mask_for_load (void)
5144 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5145 if ((TARGET_ALTIVEC && !TARGET_VSX)
5146 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5147 return altivec_builtin_mask_for_load;
5148 else
5149 return 0;
5152 /* Implement LOOP_ALIGN. */
5154 rs6000_loop_align (rtx label)
5156 basic_block bb;
5157 int ninsns;
5159 /* Don't override loop alignment if -falign-loops was specified. */
5160 if (!can_override_loop_align)
5161 return align_loops_log;
5163 bb = BLOCK_FOR_INSN (label);
5164 ninsns = num_loop_insns(bb->loop_father);
5166 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5167 if (ninsns > 4 && ninsns <= 8
5168 && (rs6000_cpu == PROCESSOR_POWER4
5169 || rs6000_cpu == PROCESSOR_POWER5
5170 || rs6000_cpu == PROCESSOR_POWER6
5171 || rs6000_cpu == PROCESSOR_POWER7
5172 || rs6000_cpu == PROCESSOR_POWER8
5173 || rs6000_cpu == PROCESSOR_POWER9))
5174 return 5;
5175 else
5176 return align_loops_log;
5179 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5180 static int
5181 rs6000_loop_align_max_skip (rtx_insn *label)
5183 return (1 << rs6000_loop_align (label)) - 1;
5186 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5187 after applying N number of iterations. This routine does not determine
5188 how may iterations are required to reach desired alignment. */
5190 static bool
5191 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5193 if (is_packed)
5194 return false;
5196 if (TARGET_32BIT)
5198 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5199 return true;
5201 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5202 return true;
5204 return false;
5206 else
5208 if (TARGET_MACHO)
5209 return false;
5211 /* Assuming that all other types are naturally aligned. CHECKME! */
5212 return true;
5216 /* Return true if the vector misalignment factor is supported by the
5217 target. */
5218 static bool
5219 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5220 const_tree type,
5221 int misalignment,
5222 bool is_packed)
5224 if (TARGET_VSX)
5226 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5227 return true;
5229 /* Return if movmisalign pattern is not supported for this mode. */
5230 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5231 return false;
5233 if (misalignment == -1)
5235 /* Misalignment factor is unknown at compile time but we know
5236 it's word aligned. */
5237 if (rs6000_vector_alignment_reachable (type, is_packed))
5239 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5241 if (element_size == 64 || element_size == 32)
5242 return true;
5245 return false;
5248 /* VSX supports word-aligned vector. */
5249 if (misalignment % 4 == 0)
5250 return true;
5252 return false;
5255 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5256 static int
5257 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5258 tree vectype, int misalign)
5260 unsigned elements;
5261 tree elem_type;
5263 switch (type_of_cost)
5265 case scalar_stmt:
5266 case scalar_load:
5267 case scalar_store:
5268 case vector_stmt:
5269 case vector_load:
5270 case vector_store:
5271 case vec_to_scalar:
5272 case scalar_to_vec:
5273 case cond_branch_not_taken:
5274 return 1;
5276 case vec_perm:
5277 if (TARGET_VSX)
5278 return 3;
5279 else
5280 return 1;
5282 case vec_promote_demote:
5283 if (TARGET_VSX)
5284 return 4;
5285 else
5286 return 1;
5288 case cond_branch_taken:
5289 return 3;
5291 case unaligned_load:
5292 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5293 return 1;
5295 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5297 elements = TYPE_VECTOR_SUBPARTS (vectype);
5298 if (elements == 2)
5299 /* Double word aligned. */
5300 return 2;
5302 if (elements == 4)
5304 switch (misalign)
5306 case 8:
5307 /* Double word aligned. */
5308 return 2;
5310 case -1:
5311 /* Unknown misalignment. */
5312 case 4:
5313 case 12:
5314 /* Word aligned. */
5315 return 22;
5317 default:
5318 gcc_unreachable ();
5323 if (TARGET_ALTIVEC)
5324 /* Misaligned loads are not supported. */
5325 gcc_unreachable ();
5327 return 2;
5329 case unaligned_store:
5330 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5331 return 1;
5333 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5335 elements = TYPE_VECTOR_SUBPARTS (vectype);
5336 if (elements == 2)
5337 /* Double word aligned. */
5338 return 2;
5340 if (elements == 4)
5342 switch (misalign)
5344 case 8:
5345 /* Double word aligned. */
5346 return 2;
5348 case -1:
5349 /* Unknown misalignment. */
5350 case 4:
5351 case 12:
5352 /* Word aligned. */
5353 return 23;
5355 default:
5356 gcc_unreachable ();
5361 if (TARGET_ALTIVEC)
5362 /* Misaligned stores are not supported. */
5363 gcc_unreachable ();
5365 return 2;
5367 case vec_construct:
5368 /* This is a rough approximation assuming non-constant elements
5369 constructed into a vector via element insertion. FIXME:
5370 vec_construct is not granular enough for uniformly good
5371 decisions. If the initialization is a splat, this is
5372 cheaper than we estimate. Improve this someday. */
5373 elem_type = TREE_TYPE (vectype);
5374 /* 32-bit vectors loaded into registers are stored as double
5375 precision, so we need 2 permutes, 2 converts, and 1 merge
5376 to construct a vector of short floats from them. */
5377 if (SCALAR_FLOAT_TYPE_P (elem_type)
5378 && TYPE_PRECISION (elem_type) == 32)
5379 return 5;
5380 else
5381 return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
5383 default:
5384 gcc_unreachable ();
5388 /* Implement targetm.vectorize.preferred_simd_mode. */
5390 static machine_mode
5391 rs6000_preferred_simd_mode (machine_mode mode)
5393 if (TARGET_VSX)
5394 switch (mode)
5396 case DFmode:
5397 return V2DFmode;
5398 default:;
5400 if (TARGET_ALTIVEC || TARGET_VSX)
5401 switch (mode)
5403 case SFmode:
5404 return V4SFmode;
5405 case TImode:
5406 return V1TImode;
5407 case DImode:
5408 return V2DImode;
5409 case SImode:
5410 return V4SImode;
5411 case HImode:
5412 return V8HImode;
5413 case QImode:
5414 return V16QImode;
5415 default:;
5417 if (TARGET_SPE)
5418 switch (mode)
5420 case SFmode:
5421 return V2SFmode;
5422 case SImode:
5423 return V2SImode;
5424 default:;
5426 if (TARGET_PAIRED_FLOAT
5427 && mode == SFmode)
5428 return V2SFmode;
5429 return word_mode;
5432 typedef struct _rs6000_cost_data
5434 struct loop *loop_info;
5435 unsigned cost[3];
5436 } rs6000_cost_data;
5438 /* Test for likely overcommitment of vector hardware resources. If a
5439 loop iteration is relatively large, and too large a percentage of
5440 instructions in the loop are vectorized, the cost model may not
5441 adequately reflect delays from unavailable vector resources.
5442 Penalize the loop body cost for this case. */
5444 static void
5445 rs6000_density_test (rs6000_cost_data *data)
5447 const int DENSITY_PCT_THRESHOLD = 85;
5448 const int DENSITY_SIZE_THRESHOLD = 70;
5449 const int DENSITY_PENALTY = 10;
5450 struct loop *loop = data->loop_info;
5451 basic_block *bbs = get_loop_body (loop);
5452 int nbbs = loop->num_nodes;
5453 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5454 int i, density_pct;
5456 for (i = 0; i < nbbs; i++)
5458 basic_block bb = bbs[i];
5459 gimple_stmt_iterator gsi;
5461 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5463 gimple *stmt = gsi_stmt (gsi);
5464 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5466 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5467 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5468 not_vec_cost++;
5472 free (bbs);
5473 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5475 if (density_pct > DENSITY_PCT_THRESHOLD
5476 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5478 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5479 if (dump_enabled_p ())
5480 dump_printf_loc (MSG_NOTE, vect_location,
5481 "density %d%%, cost %d exceeds threshold, penalizing "
5482 "loop body cost by %d%%", density_pct,
5483 vec_cost + not_vec_cost, DENSITY_PENALTY);
5487 /* Implement targetm.vectorize.init_cost. */
5489 static void *
5490 rs6000_init_cost (struct loop *loop_info)
5492 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5493 data->loop_info = loop_info;
5494 data->cost[vect_prologue] = 0;
5495 data->cost[vect_body] = 0;
5496 data->cost[vect_epilogue] = 0;
5497 return data;
5500 /* Implement targetm.vectorize.add_stmt_cost. */
5502 static unsigned
5503 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5504 struct _stmt_vec_info *stmt_info, int misalign,
5505 enum vect_cost_model_location where)
5507 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5508 unsigned retval = 0;
5510 if (flag_vect_cost_model)
5512 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5513 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5514 misalign);
5515 /* Statements in an inner loop relative to the loop being
5516 vectorized are weighted more heavily. The value here is
5517 arbitrary and could potentially be improved with analysis. */
5518 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5519 count *= 50; /* FIXME. */
5521 retval = (unsigned) (count * stmt_cost);
5522 cost_data->cost[where] += retval;
5525 return retval;
5528 /* Implement targetm.vectorize.finish_cost. */
5530 static void
5531 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5532 unsigned *body_cost, unsigned *epilogue_cost)
5534 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5536 if (cost_data->loop_info)
5537 rs6000_density_test (cost_data);
5539 *prologue_cost = cost_data->cost[vect_prologue];
5540 *body_cost = cost_data->cost[vect_body];
5541 *epilogue_cost = cost_data->cost[vect_epilogue];
5544 /* Implement targetm.vectorize.destroy_cost_data. */
5546 static void
5547 rs6000_destroy_cost_data (void *data)
5549 free (data);
5552 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5553 library with vectorized intrinsics. */
5555 static tree
5556 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5557 tree type_in)
5559 char name[32];
5560 const char *suffix = NULL;
5561 tree fntype, new_fndecl, bdecl = NULL_TREE;
5562 int n_args = 1;
5563 const char *bname;
5564 machine_mode el_mode, in_mode;
5565 int n, in_n;
5567 /* Libmass is suitable for unsafe math only as it does not correctly support
5568 parts of IEEE with the required precision such as denormals. Only support
5569 it if we have VSX to use the simd d2 or f4 functions.
5570 XXX: Add variable length support. */
5571 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5572 return NULL_TREE;
5574 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5575 n = TYPE_VECTOR_SUBPARTS (type_out);
5576 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5577 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5578 if (el_mode != in_mode
5579 || n != in_n)
5580 return NULL_TREE;
5582 switch (fn)
5584 CASE_CFN_ATAN2:
5585 CASE_CFN_HYPOT:
5586 CASE_CFN_POW:
5587 n_args = 2;
5588 gcc_fallthrough ();
5590 CASE_CFN_ACOS:
5591 CASE_CFN_ACOSH:
5592 CASE_CFN_ASIN:
5593 CASE_CFN_ASINH:
5594 CASE_CFN_ATAN:
5595 CASE_CFN_ATANH:
5596 CASE_CFN_CBRT:
5597 CASE_CFN_COS:
5598 CASE_CFN_COSH:
5599 CASE_CFN_ERF:
5600 CASE_CFN_ERFC:
5601 CASE_CFN_EXP2:
5602 CASE_CFN_EXP:
5603 CASE_CFN_EXPM1:
5604 CASE_CFN_LGAMMA:
5605 CASE_CFN_LOG10:
5606 CASE_CFN_LOG1P:
5607 CASE_CFN_LOG2:
5608 CASE_CFN_LOG:
5609 CASE_CFN_SIN:
5610 CASE_CFN_SINH:
5611 CASE_CFN_SQRT:
5612 CASE_CFN_TAN:
5613 CASE_CFN_TANH:
5614 if (el_mode == DFmode && n == 2)
5616 bdecl = mathfn_built_in (double_type_node, fn);
5617 suffix = "d2"; /* pow -> powd2 */
5619 else if (el_mode == SFmode && n == 4)
5621 bdecl = mathfn_built_in (float_type_node, fn);
5622 suffix = "4"; /* powf -> powf4 */
5624 else
5625 return NULL_TREE;
5626 if (!bdecl)
5627 return NULL_TREE;
5628 break;
5630 default:
5631 return NULL_TREE;
5634 gcc_assert (suffix != NULL);
5635 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5636 if (!bname)
5637 return NULL_TREE;
5639 strcpy (name, bname + sizeof ("__builtin_") - 1);
5640 strcat (name, suffix);
5642 if (n_args == 1)
5643 fntype = build_function_type_list (type_out, type_in, NULL);
5644 else if (n_args == 2)
5645 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5646 else
5647 gcc_unreachable ();
5649 /* Build a function declaration for the vectorized function. */
5650 new_fndecl = build_decl (BUILTINS_LOCATION,
5651 FUNCTION_DECL, get_identifier (name), fntype);
5652 TREE_PUBLIC (new_fndecl) = 1;
5653 DECL_EXTERNAL (new_fndecl) = 1;
5654 DECL_IS_NOVOPS (new_fndecl) = 1;
5655 TREE_READONLY (new_fndecl) = 1;
5657 return new_fndecl;
5660 /* Returns a function decl for a vectorized version of the builtin function
5661 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5662 if it is not available. */
5664 static tree
5665 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5666 tree type_in)
5668 machine_mode in_mode, out_mode;
5669 int in_n, out_n;
5671 if (TARGET_DEBUG_BUILTIN)
5672 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5673 combined_fn_name (combined_fn (fn)),
5674 GET_MODE_NAME (TYPE_MODE (type_out)),
5675 GET_MODE_NAME (TYPE_MODE (type_in)));
5677 if (TREE_CODE (type_out) != VECTOR_TYPE
5678 || TREE_CODE (type_in) != VECTOR_TYPE
5679 || !TARGET_VECTORIZE_BUILTINS)
5680 return NULL_TREE;
5682 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5683 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5684 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5685 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5687 switch (fn)
5689 CASE_CFN_COPYSIGN:
5690 if (VECTOR_UNIT_VSX_P (V2DFmode)
5691 && out_mode == DFmode && out_n == 2
5692 && in_mode == DFmode && in_n == 2)
5693 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5694 if (VECTOR_UNIT_VSX_P (V4SFmode)
5695 && out_mode == SFmode && out_n == 4
5696 && in_mode == SFmode && in_n == 4)
5697 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5698 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5699 && out_mode == SFmode && out_n == 4
5700 && in_mode == SFmode && in_n == 4)
5701 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5702 break;
5703 CASE_CFN_CEIL:
5704 if (VECTOR_UNIT_VSX_P (V2DFmode)
5705 && out_mode == DFmode && out_n == 2
5706 && in_mode == DFmode && in_n == 2)
5707 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5708 if (VECTOR_UNIT_VSX_P (V4SFmode)
5709 && out_mode == SFmode && out_n == 4
5710 && in_mode == SFmode && in_n == 4)
5711 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5712 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5713 && out_mode == SFmode && out_n == 4
5714 && in_mode == SFmode && in_n == 4)
5715 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5716 break;
5717 CASE_CFN_FLOOR:
5718 if (VECTOR_UNIT_VSX_P (V2DFmode)
5719 && out_mode == DFmode && out_n == 2
5720 && in_mode == DFmode && in_n == 2)
5721 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5722 if (VECTOR_UNIT_VSX_P (V4SFmode)
5723 && out_mode == SFmode && out_n == 4
5724 && in_mode == SFmode && in_n == 4)
5725 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5726 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5727 && out_mode == SFmode && out_n == 4
5728 && in_mode == SFmode && in_n == 4)
5729 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5730 break;
5731 CASE_CFN_FMA:
5732 if (VECTOR_UNIT_VSX_P (V2DFmode)
5733 && out_mode == DFmode && out_n == 2
5734 && in_mode == DFmode && in_n == 2)
5735 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5736 if (VECTOR_UNIT_VSX_P (V4SFmode)
5737 && out_mode == SFmode && out_n == 4
5738 && in_mode == SFmode && in_n == 4)
5739 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5740 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5741 && out_mode == SFmode && out_n == 4
5742 && in_mode == SFmode && in_n == 4)
5743 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5744 break;
5745 CASE_CFN_TRUNC:
5746 if (VECTOR_UNIT_VSX_P (V2DFmode)
5747 && out_mode == DFmode && out_n == 2
5748 && in_mode == DFmode && in_n == 2)
5749 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5750 if (VECTOR_UNIT_VSX_P (V4SFmode)
5751 && out_mode == SFmode && out_n == 4
5752 && in_mode == SFmode && in_n == 4)
5753 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5754 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5755 && out_mode == SFmode && out_n == 4
5756 && in_mode == SFmode && in_n == 4)
5757 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5758 break;
5759 CASE_CFN_NEARBYINT:
5760 if (VECTOR_UNIT_VSX_P (V2DFmode)
5761 && flag_unsafe_math_optimizations
5762 && out_mode == DFmode && out_n == 2
5763 && in_mode == DFmode && in_n == 2)
5764 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5765 if (VECTOR_UNIT_VSX_P (V4SFmode)
5766 && flag_unsafe_math_optimizations
5767 && out_mode == SFmode && out_n == 4
5768 && in_mode == SFmode && in_n == 4)
5769 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5770 break;
5771 CASE_CFN_RINT:
5772 if (VECTOR_UNIT_VSX_P (V2DFmode)
5773 && !flag_trapping_math
5774 && out_mode == DFmode && out_n == 2
5775 && in_mode == DFmode && in_n == 2)
5776 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5777 if (VECTOR_UNIT_VSX_P (V4SFmode)
5778 && !flag_trapping_math
5779 && out_mode == SFmode && out_n == 4
5780 && in_mode == SFmode && in_n == 4)
5781 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5782 break;
5783 default:
5784 break;
5787 /* Generate calls to libmass if appropriate. */
5788 if (rs6000_veclib_handler)
5789 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5791 return NULL_TREE;
5794 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5796 static tree
5797 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5798 tree type_in)
5800 machine_mode in_mode, out_mode;
5801 int in_n, out_n;
5803 if (TARGET_DEBUG_BUILTIN)
5804 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5805 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5806 GET_MODE_NAME (TYPE_MODE (type_out)),
5807 GET_MODE_NAME (TYPE_MODE (type_in)));
5809 if (TREE_CODE (type_out) != VECTOR_TYPE
5810 || TREE_CODE (type_in) != VECTOR_TYPE
5811 || !TARGET_VECTORIZE_BUILTINS)
5812 return NULL_TREE;
5814 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5815 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5816 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5817 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5819 enum rs6000_builtins fn
5820 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5821 switch (fn)
5823 case RS6000_BUILTIN_RSQRTF:
5824 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5825 && out_mode == SFmode && out_n == 4
5826 && in_mode == SFmode && in_n == 4)
5827 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5828 break;
5829 case RS6000_BUILTIN_RSQRT:
5830 if (VECTOR_UNIT_VSX_P (V2DFmode)
5831 && out_mode == DFmode && out_n == 2
5832 && in_mode == DFmode && in_n == 2)
5833 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5834 break;
5835 case RS6000_BUILTIN_RECIPF:
5836 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5837 && out_mode == SFmode && out_n == 4
5838 && in_mode == SFmode && in_n == 4)
5839 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5840 break;
5841 case RS6000_BUILTIN_RECIP:
5842 if (VECTOR_UNIT_VSX_P (V2DFmode)
5843 && out_mode == DFmode && out_n == 2
5844 && in_mode == DFmode && in_n == 2)
5845 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5846 break;
5847 default:
5848 break;
5850 return NULL_TREE;
5853 /* Default CPU string for rs6000*_file_start functions. */
5854 static const char *rs6000_default_cpu;
5856 /* Do anything needed at the start of the asm file. */
5858 static void
5859 rs6000_file_start (void)
5861 char buffer[80];
5862 const char *start = buffer;
5863 FILE *file = asm_out_file;
5865 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5867 default_file_start ();
5869 if (flag_verbose_asm)
5871 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5873 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5875 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5876 start = "";
5879 if (global_options_set.x_rs6000_cpu_index)
5881 fprintf (file, "%s -mcpu=%s", start,
5882 processor_target_table[rs6000_cpu_index].name);
5883 start = "";
5886 if (global_options_set.x_rs6000_tune_index)
5888 fprintf (file, "%s -mtune=%s", start,
5889 processor_target_table[rs6000_tune_index].name);
5890 start = "";
5893 if (PPC405_ERRATUM77)
5895 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5896 start = "";
5899 #ifdef USING_ELFOS_H
5900 switch (rs6000_sdata)
5902 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5903 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5904 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5905 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5908 if (rs6000_sdata && g_switch_value)
5910 fprintf (file, "%s -G %d", start,
5911 g_switch_value);
5912 start = "";
5914 #endif
5916 if (*start == '\0')
5917 putc ('\n', file);
5920 #ifdef USING_ELFOS_H
5921 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5922 && !global_options_set.x_rs6000_cpu_index)
5924 fputs ("\t.machine ", asm_out_file);
5925 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5926 fputs ("power9\n", asm_out_file);
5927 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5928 fputs ("power8\n", asm_out_file);
5929 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5930 fputs ("power7\n", asm_out_file);
5931 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5932 fputs ("power6\n", asm_out_file);
5933 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5934 fputs ("power5\n", asm_out_file);
5935 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5936 fputs ("power4\n", asm_out_file);
5937 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5938 fputs ("ppc64\n", asm_out_file);
5939 else
5940 fputs ("ppc\n", asm_out_file);
5942 #endif
5944 if (DEFAULT_ABI == ABI_ELFv2)
5945 fprintf (file, "\t.abiversion 2\n");
5949 /* Return nonzero if this function is known to have a null epilogue. */
5952 direct_return (void)
5954 if (reload_completed)
5956 rs6000_stack_t *info = rs6000_stack_info ();
5958 if (info->first_gp_reg_save == 32
5959 && info->first_fp_reg_save == 64
5960 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5961 && ! info->lr_save_p
5962 && ! info->cr_save_p
5963 && info->vrsave_size == 0
5964 && ! info->push_p)
5965 return 1;
5968 return 0;
5971 /* Return the number of instructions it takes to form a constant in an
5972 integer register. */
5975 num_insns_constant_wide (HOST_WIDE_INT value)
5977 /* signed constant loadable with addi */
5978 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5979 return 1;
5981 /* constant loadable with addis */
5982 else if ((value & 0xffff) == 0
5983 && (value >> 31 == -1 || value >> 31 == 0))
5984 return 1;
5986 else if (TARGET_POWERPC64)
5988 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5989 HOST_WIDE_INT high = value >> 31;
5991 if (high == 0 || high == -1)
5992 return 2;
5994 high >>= 1;
5996 if (low == 0)
5997 return num_insns_constant_wide (high) + 1;
5998 else if (high == 0)
5999 return num_insns_constant_wide (low) + 1;
6000 else
6001 return (num_insns_constant_wide (high)
6002 + num_insns_constant_wide (low) + 1);
6005 else
6006 return 2;
6010 num_insns_constant (rtx op, machine_mode mode)
6012 HOST_WIDE_INT low, high;
6014 switch (GET_CODE (op))
6016 case CONST_INT:
6017 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6018 && rs6000_is_valid_and_mask (op, mode))
6019 return 2;
6020 else
6021 return num_insns_constant_wide (INTVAL (op));
6023 case CONST_WIDE_INT:
6025 int i;
6026 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6027 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6028 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6029 return ins;
6032 case CONST_DOUBLE:
6033 if (mode == SFmode || mode == SDmode)
6035 long l;
6037 if (DECIMAL_FLOAT_MODE_P (mode))
6038 REAL_VALUE_TO_TARGET_DECIMAL32
6039 (*CONST_DOUBLE_REAL_VALUE (op), l);
6040 else
6041 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6042 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6045 long l[2];
6046 if (DECIMAL_FLOAT_MODE_P (mode))
6047 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6048 else
6049 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6050 high = l[WORDS_BIG_ENDIAN == 0];
6051 low = l[WORDS_BIG_ENDIAN != 0];
6053 if (TARGET_32BIT)
6054 return (num_insns_constant_wide (low)
6055 + num_insns_constant_wide (high));
6056 else
6058 if ((high == 0 && low >= 0)
6059 || (high == -1 && low < 0))
6060 return num_insns_constant_wide (low);
6062 else if (rs6000_is_valid_and_mask (op, mode))
6063 return 2;
6065 else if (low == 0)
6066 return num_insns_constant_wide (high) + 1;
6068 else
6069 return (num_insns_constant_wide (high)
6070 + num_insns_constant_wide (low) + 1);
6073 default:
6074 gcc_unreachable ();
6078 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6079 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6080 corresponding element of the vector, but for V4SFmode and V2SFmode,
6081 the corresponding "float" is interpreted as an SImode integer. */
6083 HOST_WIDE_INT
6084 const_vector_elt_as_int (rtx op, unsigned int elt)
6086 rtx tmp;
6088 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6089 gcc_assert (GET_MODE (op) != V2DImode
6090 && GET_MODE (op) != V2DFmode);
6092 tmp = CONST_VECTOR_ELT (op, elt);
6093 if (GET_MODE (op) == V4SFmode
6094 || GET_MODE (op) == V2SFmode)
6095 tmp = gen_lowpart (SImode, tmp);
6096 return INTVAL (tmp);
6099 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6100 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6101 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6102 all items are set to the same value and contain COPIES replicas of the
6103 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6104 operand and the others are set to the value of the operand's msb. */
6106 static bool
6107 vspltis_constant (rtx op, unsigned step, unsigned copies)
6109 machine_mode mode = GET_MODE (op);
6110 machine_mode inner = GET_MODE_INNER (mode);
6112 unsigned i;
6113 unsigned nunits;
6114 unsigned bitsize;
6115 unsigned mask;
6117 HOST_WIDE_INT val;
6118 HOST_WIDE_INT splat_val;
6119 HOST_WIDE_INT msb_val;
6121 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6122 return false;
6124 nunits = GET_MODE_NUNITS (mode);
6125 bitsize = GET_MODE_BITSIZE (inner);
6126 mask = GET_MODE_MASK (inner);
6128 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6129 splat_val = val;
6130 msb_val = val >= 0 ? 0 : -1;
6132 /* Construct the value to be splatted, if possible. If not, return 0. */
6133 for (i = 2; i <= copies; i *= 2)
6135 HOST_WIDE_INT small_val;
6136 bitsize /= 2;
6137 small_val = splat_val >> bitsize;
6138 mask >>= bitsize;
6139 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
6140 return false;
6141 splat_val = small_val;
6144 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6145 if (EASY_VECTOR_15 (splat_val))
6148 /* Also check if we can splat, and then add the result to itself. Do so if
6149 the value is positive, of if the splat instruction is using OP's mode;
6150 for splat_val < 0, the splat and the add should use the same mode. */
6151 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6152 && (splat_val >= 0 || (step == 1 && copies == 1)))
6155 /* Also check if are loading up the most significant bit which can be done by
6156 loading up -1 and shifting the value left by -1. */
6157 else if (EASY_VECTOR_MSB (splat_val, inner))
6160 else
6161 return false;
6163 /* Check if VAL is present in every STEP-th element, and the
6164 other elements are filled with its most significant bit. */
6165 for (i = 1; i < nunits; ++i)
6167 HOST_WIDE_INT desired_val;
6168 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6169 if ((i & (step - 1)) == 0)
6170 desired_val = val;
6171 else
6172 desired_val = msb_val;
6174 if (desired_val != const_vector_elt_as_int (op, elt))
6175 return false;
6178 return true;
6181 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6182 instruction, filling in the bottom elements with 0 or -1.
6184 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6185 for the number of zeroes to shift in, or negative for the number of 0xff
6186 bytes to shift in.
6188 OP is a CONST_VECTOR. */
6191 vspltis_shifted (rtx op)
6193 machine_mode mode = GET_MODE (op);
6194 machine_mode inner = GET_MODE_INNER (mode);
6196 unsigned i, j;
6197 unsigned nunits;
6198 unsigned mask;
6200 HOST_WIDE_INT val;
6202 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6203 return false;
6205 /* We need to create pseudo registers to do the shift, so don't recognize
6206 shift vector constants after reload. */
6207 if (!can_create_pseudo_p ())
6208 return false;
6210 nunits = GET_MODE_NUNITS (mode);
6211 mask = GET_MODE_MASK (inner);
6213 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6215 /* Check if the value can really be the operand of a vspltis[bhw]. */
6216 if (EASY_VECTOR_15 (val))
6219 /* Also check if we are loading up the most significant bit which can be done
6220 by loading up -1 and shifting the value left by -1. */
6221 else if (EASY_VECTOR_MSB (val, inner))
6224 else
6225 return 0;
6227 /* Check if VAL is present in every STEP-th element until we find elements
6228 that are 0 or all 1 bits. */
6229 for (i = 1; i < nunits; ++i)
6231 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6232 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6234 /* If the value isn't the splat value, check for the remaining elements
6235 being 0/-1. */
6236 if (val != elt_val)
6238 if (elt_val == 0)
6240 for (j = i+1; j < nunits; ++j)
6242 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6243 if (const_vector_elt_as_int (op, elt2) != 0)
6244 return 0;
6247 return (nunits - i) * GET_MODE_SIZE (inner);
6250 else if ((elt_val & mask) == mask)
6252 for (j = i+1; j < nunits; ++j)
6254 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6255 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6256 return 0;
6259 return -((nunits - i) * GET_MODE_SIZE (inner));
6262 else
6263 return 0;
6267 /* If all elements are equal, we don't need to do VLSDOI. */
6268 return 0;
6272 /* Return true if OP is of the given MODE and can be synthesized
6273 with a vspltisb, vspltish or vspltisw. */
6275 bool
6276 easy_altivec_constant (rtx op, machine_mode mode)
6278 unsigned step, copies;
6280 if (mode == VOIDmode)
6281 mode = GET_MODE (op);
6282 else if (mode != GET_MODE (op))
6283 return false;
6285 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6286 constants. */
6287 if (mode == V2DFmode)
6288 return zero_constant (op, mode);
6290 else if (mode == V2DImode)
6292 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6293 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6294 return false;
6296 if (zero_constant (op, mode))
6297 return true;
6299 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6300 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6301 return true;
6303 return false;
6306 /* V1TImode is a special container for TImode. Ignore for now. */
6307 else if (mode == V1TImode)
6308 return false;
6310 /* Start with a vspltisw. */
6311 step = GET_MODE_NUNITS (mode) / 4;
6312 copies = 1;
6314 if (vspltis_constant (op, step, copies))
6315 return true;
6317 /* Then try with a vspltish. */
6318 if (step == 1)
6319 copies <<= 1;
6320 else
6321 step >>= 1;
6323 if (vspltis_constant (op, step, copies))
6324 return true;
6326 /* And finally a vspltisb. */
6327 if (step == 1)
6328 copies <<= 1;
6329 else
6330 step >>= 1;
6332 if (vspltis_constant (op, step, copies))
6333 return true;
6335 if (vspltis_shifted (op) != 0)
6336 return true;
6338 return false;
6341 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6342 result is OP. Abort if it is not possible. */
6345 gen_easy_altivec_constant (rtx op)
6347 machine_mode mode = GET_MODE (op);
6348 int nunits = GET_MODE_NUNITS (mode);
6349 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6350 unsigned step = nunits / 4;
6351 unsigned copies = 1;
6353 /* Start with a vspltisw. */
6354 if (vspltis_constant (op, step, copies))
6355 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6357 /* Then try with a vspltish. */
6358 if (step == 1)
6359 copies <<= 1;
6360 else
6361 step >>= 1;
6363 if (vspltis_constant (op, step, copies))
6364 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6366 /* And finally a vspltisb. */
6367 if (step == 1)
6368 copies <<= 1;
6369 else
6370 step >>= 1;
6372 if (vspltis_constant (op, step, copies))
6373 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6375 gcc_unreachable ();
6378 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6379 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6381 Return the number of instructions needed (1 or 2) into the address pointed
6382 via NUM_INSNS_PTR.
6384 Return the constant that is being split via CONSTANT_PTR. */
6386 bool
6387 xxspltib_constant_p (rtx op,
6388 machine_mode mode,
6389 int *num_insns_ptr,
6390 int *constant_ptr)
6392 size_t nunits = GET_MODE_NUNITS (mode);
6393 size_t i;
6394 HOST_WIDE_INT value;
6395 rtx element;
6397 /* Set the returned values to out of bound values. */
6398 *num_insns_ptr = -1;
6399 *constant_ptr = 256;
6401 if (!TARGET_P9_VECTOR)
6402 return false;
6404 if (mode == VOIDmode)
6405 mode = GET_MODE (op);
6407 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6408 return false;
6410 /* Handle (vec_duplicate <constant>). */
6411 if (GET_CODE (op) == VEC_DUPLICATE)
6413 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6414 && mode != V2DImode)
6415 return false;
6417 element = XEXP (op, 0);
6418 if (!CONST_INT_P (element))
6419 return false;
6421 value = INTVAL (element);
6422 if (!IN_RANGE (value, -128, 127))
6423 return false;
6426 /* Handle (const_vector [...]). */
6427 else if (GET_CODE (op) == CONST_VECTOR)
6429 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6430 && mode != V2DImode)
6431 return false;
6433 element = CONST_VECTOR_ELT (op, 0);
6434 if (!CONST_INT_P (element))
6435 return false;
6437 value = INTVAL (element);
6438 if (!IN_RANGE (value, -128, 127))
6439 return false;
6441 for (i = 1; i < nunits; i++)
6443 element = CONST_VECTOR_ELT (op, i);
6444 if (!CONST_INT_P (element))
6445 return false;
6447 if (value != INTVAL (element))
6448 return false;
6452 /* Handle integer constants being loaded into the upper part of the VSX
6453 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6454 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6455 else if (CONST_INT_P (op))
6457 if (!SCALAR_INT_MODE_P (mode))
6458 return false;
6460 value = INTVAL (op);
6461 if (!IN_RANGE (value, -128, 127))
6462 return false;
6464 if (!IN_RANGE (value, -1, 0))
6466 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6467 return false;
6469 if (EASY_VECTOR_15 (value))
6470 return false;
6474 else
6475 return false;
6477 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6478 sign extend. Special case 0/-1 to allow getting any VSX register instead
6479 of an Altivec register. */
6480 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6481 && EASY_VECTOR_15 (value))
6482 return false;
6484 /* Return # of instructions and the constant byte for XXSPLTIB. */
6485 if (mode == V16QImode)
6486 *num_insns_ptr = 1;
6488 else if (IN_RANGE (value, -1, 0))
6489 *num_insns_ptr = 1;
6491 else
6492 *num_insns_ptr = 2;
6494 *constant_ptr = (int) value;
6495 return true;
6498 const char *
6499 output_vec_const_move (rtx *operands)
6501 int cst, cst2, shift;
6502 machine_mode mode;
6503 rtx dest, vec;
6505 dest = operands[0];
6506 vec = operands[1];
6507 mode = GET_MODE (dest);
6509 if (TARGET_VSX)
6511 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6512 int xxspltib_value = 256;
6513 int num_insns = -1;
6515 if (zero_constant (vec, mode))
6517 if (TARGET_P9_VECTOR)
6518 return "xxspltib %x0,0";
6520 else if (dest_vmx_p)
6521 return "vspltisw %0,0";
6523 else
6524 return "xxlxor %x0,%x0,%x0";
6527 if (all_ones_constant (vec, mode))
6529 if (TARGET_P9_VECTOR)
6530 return "xxspltib %x0,255";
6532 else if (dest_vmx_p)
6533 return "vspltisw %0,-1";
6535 else if (TARGET_P8_VECTOR)
6536 return "xxlorc %x0,%x0,%x0";
6538 else
6539 gcc_unreachable ();
6542 if (TARGET_P9_VECTOR
6543 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6545 if (num_insns == 1)
6547 operands[2] = GEN_INT (xxspltib_value & 0xff);
6548 return "xxspltib %x0,%2";
6551 return "#";
6555 if (TARGET_ALTIVEC)
6557 rtx splat_vec;
6559 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6560 if (zero_constant (vec, mode))
6561 return "vspltisw %0,0";
6563 if (all_ones_constant (vec, mode))
6564 return "vspltisw %0,-1";
6566 /* Do we need to construct a value using VSLDOI? */
6567 shift = vspltis_shifted (vec);
6568 if (shift != 0)
6569 return "#";
6571 splat_vec = gen_easy_altivec_constant (vec);
6572 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6573 operands[1] = XEXP (splat_vec, 0);
6574 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6575 return "#";
6577 switch (GET_MODE (splat_vec))
6579 case V4SImode:
6580 return "vspltisw %0,%1";
6582 case V8HImode:
6583 return "vspltish %0,%1";
6585 case V16QImode:
6586 return "vspltisb %0,%1";
6588 default:
6589 gcc_unreachable ();
6593 gcc_assert (TARGET_SPE);
6595 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6596 pattern of V1DI, V4HI, and V2SF.
6598 FIXME: We should probably return # and add post reload
6599 splitters for these, but this way is so easy ;-). */
6600 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6601 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6602 operands[1] = CONST_VECTOR_ELT (vec, 0);
6603 operands[2] = CONST_VECTOR_ELT (vec, 1);
6604 if (cst == cst2)
6605 return "li %0,%1\n\tevmergelo %0,%0,%0";
6606 else if (WORDS_BIG_ENDIAN)
6607 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6608 else
6609 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6612 /* Initialize TARGET of vector PAIRED to VALS. */
6614 void
6615 paired_expand_vector_init (rtx target, rtx vals)
6617 machine_mode mode = GET_MODE (target);
6618 int n_elts = GET_MODE_NUNITS (mode);
6619 int n_var = 0;
6620 rtx x, new_rtx, tmp, constant_op, op1, op2;
6621 int i;
6623 for (i = 0; i < n_elts; ++i)
6625 x = XVECEXP (vals, 0, i);
6626 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6627 ++n_var;
6629 if (n_var == 0)
6631 /* Load from constant pool. */
6632 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6633 return;
6636 if (n_var == 2)
6638 /* The vector is initialized only with non-constants. */
6639 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6640 XVECEXP (vals, 0, 1));
6642 emit_move_insn (target, new_rtx);
6643 return;
6646 /* One field is non-constant and the other one is a constant. Load the
6647 constant from the constant pool and use ps_merge instruction to
6648 construct the whole vector. */
6649 op1 = XVECEXP (vals, 0, 0);
6650 op2 = XVECEXP (vals, 0, 1);
6652 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6654 tmp = gen_reg_rtx (GET_MODE (constant_op));
6655 emit_move_insn (tmp, constant_op);
6657 if (CONSTANT_P (op1))
6658 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6659 else
6660 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6662 emit_move_insn (target, new_rtx);
6665 void
6666 paired_expand_vector_move (rtx operands[])
6668 rtx op0 = operands[0], op1 = operands[1];
6670 emit_move_insn (op0, op1);
6673 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6674 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6675 operands for the relation operation COND. This is a recursive
6676 function. */
6678 static void
6679 paired_emit_vector_compare (enum rtx_code rcode,
6680 rtx dest, rtx op0, rtx op1,
6681 rtx cc_op0, rtx cc_op1)
6683 rtx tmp = gen_reg_rtx (V2SFmode);
6684 rtx tmp1, max, min;
6686 gcc_assert (TARGET_PAIRED_FLOAT);
6687 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6689 switch (rcode)
6691 case LT:
6692 case LTU:
6693 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6694 return;
6695 case GE:
6696 case GEU:
6697 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6698 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6699 return;
6700 case LE:
6701 case LEU:
6702 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6703 return;
6704 case GT:
6705 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6706 return;
6707 case EQ:
6708 tmp1 = gen_reg_rtx (V2SFmode);
6709 max = gen_reg_rtx (V2SFmode);
6710 min = gen_reg_rtx (V2SFmode);
6711 gen_reg_rtx (V2SFmode);
6713 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6714 emit_insn (gen_selv2sf4
6715 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6716 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6717 emit_insn (gen_selv2sf4
6718 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6719 emit_insn (gen_subv2sf3 (tmp1, min, max));
6720 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6721 return;
6722 case NE:
6723 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6724 return;
6725 case UNLE:
6726 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6727 return;
6728 case UNLT:
6729 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6730 return;
6731 case UNGE:
6732 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6733 return;
6734 case UNGT:
6735 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6736 return;
6737 default:
6738 gcc_unreachable ();
6741 return;
6744 /* Emit vector conditional expression.
6745 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6746 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6749 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6750 rtx cond, rtx cc_op0, rtx cc_op1)
6752 enum rtx_code rcode = GET_CODE (cond);
6754 if (!TARGET_PAIRED_FLOAT)
6755 return 0;
6757 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6759 return 1;
6762 /* Initialize vector TARGET to VALS. */
6764 void
6765 rs6000_expand_vector_init (rtx target, rtx vals)
6767 machine_mode mode = GET_MODE (target);
6768 machine_mode inner_mode = GET_MODE_INNER (mode);
6769 int n_elts = GET_MODE_NUNITS (mode);
6770 int n_var = 0, one_var = -1;
6771 bool all_same = true, all_const_zero = true;
6772 rtx x, mem;
6773 int i;
6775 for (i = 0; i < n_elts; ++i)
6777 x = XVECEXP (vals, 0, i);
6778 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6779 ++n_var, one_var = i;
6780 else if (x != CONST0_RTX (inner_mode))
6781 all_const_zero = false;
6783 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6784 all_same = false;
6787 if (n_var == 0)
6789 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6790 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6791 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6793 /* Zero register. */
6794 emit_move_insn (target, CONST0_RTX (mode));
6795 return;
6797 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6799 /* Splat immediate. */
6800 emit_insn (gen_rtx_SET (target, const_vec));
6801 return;
6803 else
6805 /* Load from constant pool. */
6806 emit_move_insn (target, const_vec);
6807 return;
6811 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6812 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6814 rtx op0 = XVECEXP (vals, 0, 0);
6815 rtx op1 = XVECEXP (vals, 0, 1);
6816 if (all_same)
6818 if (!MEM_P (op0) && !REG_P (op0))
6819 op0 = force_reg (inner_mode, op0);
6820 if (mode == V2DFmode)
6821 emit_insn (gen_vsx_splat_v2df (target, op0));
6822 else
6823 emit_insn (gen_vsx_splat_v2di (target, op0));
6825 else
6827 op0 = force_reg (inner_mode, op0);
6828 op1 = force_reg (inner_mode, op1);
6829 if (mode == V2DFmode)
6830 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6831 else
6832 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6834 return;
6837 /* Special case initializing vector int if we are on 64-bit systems with
6838 direct move or we have the ISA 3.0 instructions. */
6839 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6840 && TARGET_DIRECT_MOVE_64BIT)
6842 if (all_same)
6844 rtx element0 = XVECEXP (vals, 0, 0);
6845 if (MEM_P (element0))
6846 element0 = rs6000_address_for_fpconvert (element0);
6847 else
6848 element0 = force_reg (SImode, element0);
6850 if (TARGET_P9_VECTOR)
6851 emit_insn (gen_vsx_splat_v4si (target, element0));
6852 else
6854 rtx tmp = gen_reg_rtx (DImode);
6855 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6856 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6858 return;
6860 else
6862 rtx elements[4];
6863 size_t i;
6865 for (i = 0; i < 4; i++)
6867 elements[i] = XVECEXP (vals, 0, i);
6868 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
6869 elements[i] = copy_to_mode_reg (SImode, elements[i]);
6872 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6873 elements[2], elements[3]));
6874 return;
6878 /* With single precision floating point on VSX, know that internally single
6879 precision is actually represented as a double, and either make 2 V2DF
6880 vectors, and convert these vectors to single precision, or do one
6881 conversion, and splat the result to the other elements. */
6882 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6884 if (all_same)
6886 rtx element0 = XVECEXP (vals, 0, 0);
6888 if (TARGET_P9_VECTOR)
6890 if (MEM_P (element0))
6891 element0 = rs6000_address_for_fpconvert (element0);
6893 emit_insn (gen_vsx_splat_v4sf (target, element0));
6896 else
6898 rtx freg = gen_reg_rtx (V4SFmode);
6899 rtx sreg = force_reg (SFmode, element0);
6900 rtx cvt = (TARGET_XSCVDPSPN
6901 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6902 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6904 emit_insn (cvt);
6905 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6906 const0_rtx));
6909 else
6911 rtx dbl_even = gen_reg_rtx (V2DFmode);
6912 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6913 rtx flt_even = gen_reg_rtx (V4SFmode);
6914 rtx flt_odd = gen_reg_rtx (V4SFmode);
6915 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6916 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6917 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6918 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6920 /* Use VMRGEW if we can instead of doing a permute. */
6921 if (TARGET_P8_VECTOR)
6923 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6924 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6925 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6926 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6927 if (BYTES_BIG_ENDIAN)
6928 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6929 else
6930 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6932 else
6934 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6935 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6936 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6937 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6938 rs6000_expand_extract_even (target, flt_even, flt_odd);
6941 return;
6944 /* Special case initializing vector short/char that are splats if we are on
6945 64-bit systems with direct move. */
6946 if (all_same && TARGET_DIRECT_MOVE_64BIT
6947 && (mode == V16QImode || mode == V8HImode))
6949 rtx op0 = XVECEXP (vals, 0, 0);
6950 rtx di_tmp = gen_reg_rtx (DImode);
6952 if (!REG_P (op0))
6953 op0 = force_reg (GET_MODE_INNER (mode), op0);
6955 if (mode == V16QImode)
6957 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6958 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6959 return;
6962 if (mode == V8HImode)
6964 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6965 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6966 return;
6970 /* Store value to stack temp. Load vector element. Splat. However, splat
6971 of 64-bit items is not supported on Altivec. */
6972 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6974 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6975 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6976 XVECEXP (vals, 0, 0));
6977 x = gen_rtx_UNSPEC (VOIDmode,
6978 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6979 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6980 gen_rtvec (2,
6981 gen_rtx_SET (target, mem),
6982 x)));
6983 x = gen_rtx_VEC_SELECT (inner_mode, target,
6984 gen_rtx_PARALLEL (VOIDmode,
6985 gen_rtvec (1, const0_rtx)));
6986 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6987 return;
6990 /* One field is non-constant. Load constant then overwrite
6991 varying field. */
6992 if (n_var == 1)
6994 rtx copy = copy_rtx (vals);
6996 /* Load constant part of vector, substitute neighboring value for
6997 varying element. */
6998 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6999 rs6000_expand_vector_init (target, copy);
7001 /* Insert variable. */
7002 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7003 return;
7006 /* Construct the vector in memory one field at a time
7007 and load the whole vector. */
7008 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7009 for (i = 0; i < n_elts; i++)
7010 emit_move_insn (adjust_address_nv (mem, inner_mode,
7011 i * GET_MODE_SIZE (inner_mode)),
7012 XVECEXP (vals, 0, i));
7013 emit_move_insn (target, mem);
7016 /* Set field ELT of TARGET to VAL. */
7018 void
7019 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7021 machine_mode mode = GET_MODE (target);
7022 machine_mode inner_mode = GET_MODE_INNER (mode);
7023 rtx reg = gen_reg_rtx (mode);
7024 rtx mask, mem, x;
7025 int width = GET_MODE_SIZE (inner_mode);
7026 int i;
7028 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7030 rtx (*set_func) (rtx, rtx, rtx, rtx)
7031 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
7032 emit_insn (set_func (target, target, val, GEN_INT (elt)));
7033 return;
7036 /* Simplify setting single element vectors like V1TImode. */
7037 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7039 emit_move_insn (target, gen_lowpart (mode, val));
7040 return;
7043 /* Load single variable value. */
7044 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7045 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7046 x = gen_rtx_UNSPEC (VOIDmode,
7047 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7048 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7049 gen_rtvec (2,
7050 gen_rtx_SET (reg, mem),
7051 x)));
7053 /* Linear sequence. */
7054 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7055 for (i = 0; i < 16; ++i)
7056 XVECEXP (mask, 0, i) = GEN_INT (i);
7058 /* Set permute mask to insert element into target. */
7059 for (i = 0; i < width; ++i)
7060 XVECEXP (mask, 0, elt*width + i)
7061 = GEN_INT (i + 0x10);
7062 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7064 if (BYTES_BIG_ENDIAN)
7065 x = gen_rtx_UNSPEC (mode,
7066 gen_rtvec (3, target, reg,
7067 force_reg (V16QImode, x)),
7068 UNSPEC_VPERM);
7069 else
7071 if (TARGET_P9_VECTOR)
7072 x = gen_rtx_UNSPEC (mode,
7073 gen_rtvec (3, target, reg,
7074 force_reg (V16QImode, x)),
7075 UNSPEC_VPERMR);
7076 else
7078 /* Invert selector. We prefer to generate VNAND on P8 so
7079 that future fusion opportunities can kick in, but must
7080 generate VNOR elsewhere. */
7081 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7082 rtx iorx = (TARGET_P8_VECTOR
7083 ? gen_rtx_IOR (V16QImode, notx, notx)
7084 : gen_rtx_AND (V16QImode, notx, notx));
7085 rtx tmp = gen_reg_rtx (V16QImode);
7086 emit_insn (gen_rtx_SET (tmp, iorx));
7088 /* Permute with operands reversed and adjusted selector. */
7089 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7090 UNSPEC_VPERM);
7094 emit_insn (gen_rtx_SET (target, x));
7097 /* Extract field ELT from VEC into TARGET. */
7099 void
7100 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7102 machine_mode mode = GET_MODE (vec);
7103 machine_mode inner_mode = GET_MODE_INNER (mode);
7104 rtx mem;
7106 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7108 switch (mode)
7110 default:
7111 break;
7112 case V1TImode:
7113 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7114 emit_move_insn (target, gen_lowpart (TImode, vec));
7115 break;
7116 case V2DFmode:
7117 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7118 return;
7119 case V2DImode:
7120 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7121 return;
7122 case V4SFmode:
7123 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7124 return;
7125 case V16QImode:
7126 if (TARGET_DIRECT_MOVE_64BIT)
7128 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7129 return;
7131 else
7132 break;
7133 case V8HImode:
7134 if (TARGET_DIRECT_MOVE_64BIT)
7136 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7137 return;
7139 else
7140 break;
7141 case V4SImode:
7142 if (TARGET_DIRECT_MOVE_64BIT)
7144 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7145 return;
7147 break;
7150 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7151 && TARGET_DIRECT_MOVE_64BIT)
7153 if (GET_MODE (elt) != DImode)
7155 rtx tmp = gen_reg_rtx (DImode);
7156 convert_move (tmp, elt, 0);
7157 elt = tmp;
7160 switch (mode)
7162 case V2DFmode:
7163 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7164 return;
7166 case V2DImode:
7167 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7168 return;
7170 case V4SFmode:
7171 if (TARGET_UPPER_REGS_SF)
7173 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7174 return;
7176 break;
7178 case V4SImode:
7179 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7180 return;
7182 case V8HImode:
7183 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7184 return;
7186 case V16QImode:
7187 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7188 return;
7190 default:
7191 gcc_unreachable ();
7195 gcc_assert (CONST_INT_P (elt));
7197 /* Allocate mode-sized buffer. */
7198 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7200 emit_move_insn (mem, vec);
7202 /* Add offset to field within buffer matching vector element. */
7203 mem = adjust_address_nv (mem, inner_mode,
7204 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7206 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7209 /* Helper function to return the register number of a RTX. */
7210 static inline int
7211 regno_or_subregno (rtx op)
7213 if (REG_P (op))
7214 return REGNO (op);
7215 else if (SUBREG_P (op))
7216 return subreg_regno (op);
7217 else
7218 gcc_unreachable ();
7221 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7222 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7223 temporary (BASE_TMP) to fixup the address. Return the new memory address
7224 that is valid for reads or writes to a given register (SCALAR_REG). */
7227 rs6000_adjust_vec_address (rtx scalar_reg,
7228 rtx mem,
7229 rtx element,
7230 rtx base_tmp,
7231 machine_mode scalar_mode)
7233 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7234 rtx addr = XEXP (mem, 0);
7235 rtx element_offset;
7236 rtx new_addr;
7237 bool valid_addr_p;
7239 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7240 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7242 /* Calculate what we need to add to the address to get the element
7243 address. */
7244 if (CONST_INT_P (element))
7245 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7246 else
7248 int byte_shift = exact_log2 (scalar_size);
7249 gcc_assert (byte_shift >= 0);
7251 if (byte_shift == 0)
7252 element_offset = element;
7254 else
7256 if (TARGET_POWERPC64)
7257 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7258 else
7259 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7261 element_offset = base_tmp;
7265 /* Create the new address pointing to the element within the vector. If we
7266 are adding 0, we don't have to change the address. */
7267 if (element_offset == const0_rtx)
7268 new_addr = addr;
7270 /* A simple indirect address can be converted into a reg + offset
7271 address. */
7272 else if (REG_P (addr) || SUBREG_P (addr))
7273 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7275 /* Optimize D-FORM addresses with constant offset with a constant element, to
7276 include the element offset in the address directly. */
7277 else if (GET_CODE (addr) == PLUS)
7279 rtx op0 = XEXP (addr, 0);
7280 rtx op1 = XEXP (addr, 1);
7281 rtx insn;
7283 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7284 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7286 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7287 rtx offset_rtx = GEN_INT (offset);
7289 if (IN_RANGE (offset, -32768, 32767)
7290 && (scalar_size < 8 || (offset & 0x3) == 0))
7291 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7292 else
7294 emit_move_insn (base_tmp, offset_rtx);
7295 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7298 else
7300 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7301 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7303 /* Note, ADDI requires the register being added to be a base
7304 register. If the register was R0, load it up into the temporary
7305 and do the add. */
7306 if (op1_reg_p
7307 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7309 insn = gen_add3_insn (base_tmp, op1, element_offset);
7310 gcc_assert (insn != NULL_RTX);
7311 emit_insn (insn);
7314 else if (ele_reg_p
7315 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7317 insn = gen_add3_insn (base_tmp, element_offset, op1);
7318 gcc_assert (insn != NULL_RTX);
7319 emit_insn (insn);
7322 else
7324 emit_move_insn (base_tmp, op1);
7325 emit_insn (gen_add2_insn (base_tmp, element_offset));
7328 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7332 else
7334 emit_move_insn (base_tmp, addr);
7335 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7338 /* If we have a PLUS, we need to see whether the particular register class
7339 allows for D-FORM or X-FORM addressing. */
7340 if (GET_CODE (new_addr) == PLUS)
7342 rtx op1 = XEXP (new_addr, 1);
7343 addr_mask_type addr_mask;
7344 int scalar_regno = regno_or_subregno (scalar_reg);
7346 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7347 if (INT_REGNO_P (scalar_regno))
7348 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7350 else if (FP_REGNO_P (scalar_regno))
7351 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7353 else if (ALTIVEC_REGNO_P (scalar_regno))
7354 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7356 else
7357 gcc_unreachable ();
7359 if (REG_P (op1) || SUBREG_P (op1))
7360 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7361 else
7362 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7365 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7366 valid_addr_p = true;
7368 else
7369 valid_addr_p = false;
7371 if (!valid_addr_p)
7373 emit_move_insn (base_tmp, new_addr);
7374 new_addr = base_tmp;
7377 return change_address (mem, scalar_mode, new_addr);
7380 /* Split a variable vec_extract operation into the component instructions. */
7382 void
7383 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7384 rtx tmp_altivec)
7386 machine_mode mode = GET_MODE (src);
7387 machine_mode scalar_mode = GET_MODE (dest);
7388 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7389 int byte_shift = exact_log2 (scalar_size);
7391 gcc_assert (byte_shift >= 0);
7393 /* If we are given a memory address, optimize to load just the element. We
7394 don't have to adjust the vector element number on little endian
7395 systems. */
7396 if (MEM_P (src))
7398 gcc_assert (REG_P (tmp_gpr));
7399 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7400 tmp_gpr, scalar_mode));
7401 return;
7404 else if (REG_P (src) || SUBREG_P (src))
7406 int bit_shift = byte_shift + 3;
7407 rtx element2;
7409 gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
7411 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7412 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7413 will shift the element into the upper position (adding 3 to convert a
7414 byte shift into a bit shift). */
7415 if (scalar_size == 8)
7417 if (!VECTOR_ELT_ORDER_BIG)
7419 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7420 element2 = tmp_gpr;
7422 else
7423 element2 = element;
7425 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7426 bit. */
7427 emit_insn (gen_rtx_SET (tmp_gpr,
7428 gen_rtx_AND (DImode,
7429 gen_rtx_ASHIFT (DImode,
7430 element2,
7431 GEN_INT (6)),
7432 GEN_INT (64))));
7434 else
7436 if (!VECTOR_ELT_ORDER_BIG)
7438 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7440 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7441 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7442 element2 = tmp_gpr;
7444 else
7445 element2 = element;
7447 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7450 /* Get the value into the lower byte of the Altivec register where VSLO
7451 expects it. */
7452 if (TARGET_P9_VECTOR)
7453 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7454 else if (can_create_pseudo_p ())
7455 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7456 else
7458 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7459 emit_move_insn (tmp_di, tmp_gpr);
7460 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7463 /* Do the VSLO to get the value into the final location. */
7464 switch (mode)
7466 case V2DFmode:
7467 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7468 return;
7470 case V2DImode:
7471 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7472 return;
7474 case V4SFmode:
7476 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7477 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7478 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7479 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7480 tmp_altivec));
7482 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7483 return;
7486 case V4SImode:
7487 case V8HImode:
7488 case V16QImode:
7490 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7491 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7492 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7493 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7494 tmp_altivec));
7495 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7496 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7497 GEN_INT (64 - (8 * scalar_size))));
7498 return;
7501 default:
7502 gcc_unreachable ();
7505 return;
7507 else
7508 gcc_unreachable ();
7511 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7512 two SImode values. */
7514 static void
7515 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7517 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7519 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7521 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7522 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7524 emit_move_insn (dest, GEN_INT (const1 | const2));
7525 return;
7528 /* Put si1 into upper 32-bits of dest. */
7529 if (CONST_INT_P (si1))
7530 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7531 else
7533 /* Generate RLDIC. */
7534 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7535 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7536 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7537 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7538 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7539 emit_insn (gen_rtx_SET (dest, and_rtx));
7542 /* Put si2 into the temporary. */
7543 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7544 if (CONST_INT_P (si2))
7545 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7546 else
7547 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7549 /* Combine the two parts. */
7550 emit_insn (gen_iordi3 (dest, dest, tmp));
7551 return;
7554 /* Split a V4SI initialization. */
7556 void
7557 rs6000_split_v4si_init (rtx operands[])
7559 rtx dest = operands[0];
7561 /* Destination is a GPR, build up the two DImode parts in place. */
7562 if (REG_P (dest) || SUBREG_P (dest))
7564 int d_regno = regno_or_subregno (dest);
7565 rtx scalar1 = operands[1];
7566 rtx scalar2 = operands[2];
7567 rtx scalar3 = operands[3];
7568 rtx scalar4 = operands[4];
7569 rtx tmp1 = operands[5];
7570 rtx tmp2 = operands[6];
7572 /* Even though we only need one temporary (plus the destination, which
7573 has an early clobber constraint, try to use two temporaries, one for
7574 each double word created. That way the 2nd insn scheduling pass can
7575 rearrange things so the two parts are done in parallel. */
7576 if (BYTES_BIG_ENDIAN)
7578 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7579 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7580 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7581 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7583 else
7585 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7586 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7587 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7588 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7589 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7591 return;
7594 else
7595 gcc_unreachable ();
7598 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
7600 bool
7601 invalid_e500_subreg (rtx op, machine_mode mode)
7603 if (TARGET_E500_DOUBLE)
7605 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
7606 subreg:TI and reg:TF. Decimal float modes are like integer
7607 modes (only low part of each register used) for this
7608 purpose. */
7609 if (GET_CODE (op) == SUBREG
7610 && (mode == SImode || mode == DImode || mode == TImode
7611 || mode == DDmode || mode == TDmode || mode == PTImode)
7612 && REG_P (SUBREG_REG (op))
7613 && (GET_MODE (SUBREG_REG (op)) == DFmode
7614 || GET_MODE (SUBREG_REG (op)) == TFmode
7615 || GET_MODE (SUBREG_REG (op)) == IFmode
7616 || GET_MODE (SUBREG_REG (op)) == KFmode))
7617 return true;
7619 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
7620 reg:TI. */
7621 if (GET_CODE (op) == SUBREG
7622 && (mode == DFmode || mode == TFmode || mode == IFmode
7623 || mode == KFmode)
7624 && REG_P (SUBREG_REG (op))
7625 && (GET_MODE (SUBREG_REG (op)) == DImode
7626 || GET_MODE (SUBREG_REG (op)) == TImode
7627 || GET_MODE (SUBREG_REG (op)) == PTImode
7628 || GET_MODE (SUBREG_REG (op)) == DDmode
7629 || GET_MODE (SUBREG_REG (op)) == TDmode))
7630 return true;
7633 if (TARGET_SPE
7634 && GET_CODE (op) == SUBREG
7635 && mode == SImode
7636 && REG_P (SUBREG_REG (op))
7637 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
7638 return true;
7640 return false;
7643 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7644 selects whether the alignment is abi mandated, optional, or
7645 both abi and optional alignment. */
7647 unsigned int
7648 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7650 if (how != align_opt)
7652 if (TREE_CODE (type) == VECTOR_TYPE)
7654 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7655 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7657 if (align < 64)
7658 align = 64;
7660 else if (align < 128)
7661 align = 128;
7663 else if (TARGET_E500_DOUBLE
7664 && TREE_CODE (type) == REAL_TYPE
7665 && TYPE_MODE (type) == DFmode)
7667 if (align < 64)
7668 align = 64;
7672 if (how != align_abi)
7674 if (TREE_CODE (type) == ARRAY_TYPE
7675 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7677 if (align < BITS_PER_WORD)
7678 align = BITS_PER_WORD;
7682 return align;
7685 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7687 bool
7688 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7690 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7692 if (computed != 128)
7694 static bool warned;
7695 if (!warned && warn_psabi)
7697 warned = true;
7698 inform (input_location,
7699 "the layout of aggregates containing vectors with"
7700 " %d-byte alignment has changed in GCC 5",
7701 computed / BITS_PER_UNIT);
7704 /* In current GCC there is no special case. */
7705 return false;
7708 return false;
7711 /* AIX increases natural record alignment to doubleword if the first
7712 field is an FP double while the FP fields remain word aligned. */
7714 unsigned int
7715 rs6000_special_round_type_align (tree type, unsigned int computed,
7716 unsigned int specified)
7718 unsigned int align = MAX (computed, specified);
7719 tree field = TYPE_FIELDS (type);
7721 /* Skip all non field decls */
7722 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7723 field = DECL_CHAIN (field);
7725 if (field != NULL && field != type)
7727 type = TREE_TYPE (field);
7728 while (TREE_CODE (type) == ARRAY_TYPE)
7729 type = TREE_TYPE (type);
7731 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7732 align = MAX (align, 64);
7735 return align;
7738 /* Darwin increases record alignment to the natural alignment of
7739 the first field. */
7741 unsigned int
7742 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7743 unsigned int specified)
7745 unsigned int align = MAX (computed, specified);
7747 if (TYPE_PACKED (type))
7748 return align;
7750 /* Find the first field, looking down into aggregates. */
7751 do {
7752 tree field = TYPE_FIELDS (type);
7753 /* Skip all non field decls */
7754 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7755 field = DECL_CHAIN (field);
7756 if (! field)
7757 break;
7758 /* A packed field does not contribute any extra alignment. */
7759 if (DECL_PACKED (field))
7760 return align;
7761 type = TREE_TYPE (field);
7762 while (TREE_CODE (type) == ARRAY_TYPE)
7763 type = TREE_TYPE (type);
7764 } while (AGGREGATE_TYPE_P (type));
7766 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7767 align = MAX (align, TYPE_ALIGN (type));
7769 return align;
7772 /* Return 1 for an operand in small memory on V.4/eabi. */
7775 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7776 machine_mode mode ATTRIBUTE_UNUSED)
7778 #if TARGET_ELF
7779 rtx sym_ref;
7781 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7782 return 0;
7784 if (DEFAULT_ABI != ABI_V4)
7785 return 0;
7787 /* Vector and float memory instructions have a limited offset on the
7788 SPE, so using a vector or float variable directly as an operand is
7789 not useful. */
7790 if (TARGET_SPE
7791 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
7792 return 0;
7794 if (GET_CODE (op) == SYMBOL_REF)
7795 sym_ref = op;
7797 else if (GET_CODE (op) != CONST
7798 || GET_CODE (XEXP (op, 0)) != PLUS
7799 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7800 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7801 return 0;
7803 else
7805 rtx sum = XEXP (op, 0);
7806 HOST_WIDE_INT summand;
7808 /* We have to be careful here, because it is the referenced address
7809 that must be 32k from _SDA_BASE_, not just the symbol. */
7810 summand = INTVAL (XEXP (sum, 1));
7811 if (summand < 0 || summand > g_switch_value)
7812 return 0;
7814 sym_ref = XEXP (sum, 0);
7817 return SYMBOL_REF_SMALL_P (sym_ref);
7818 #else
7819 return 0;
7820 #endif
7823 /* Return true if either operand is a general purpose register. */
7825 bool
7826 gpr_or_gpr_p (rtx op0, rtx op1)
7828 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7829 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7832 /* Return true if this is a move direct operation between GPR registers and
7833 floating point/VSX registers. */
7835 bool
7836 direct_move_p (rtx op0, rtx op1)
7838 int regno0, regno1;
7840 if (!REG_P (op0) || !REG_P (op1))
7841 return false;
7843 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7844 return false;
7846 regno0 = REGNO (op0);
7847 regno1 = REGNO (op1);
7848 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7849 return false;
7851 if (INT_REGNO_P (regno0))
7852 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7854 else if (INT_REGNO_P (regno1))
7856 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7857 return true;
7859 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7860 return true;
7863 return false;
7866 /* Return true if the OFFSET is valid for the quad address instructions that
7867 use d-form (register + offset) addressing. */
7869 static inline bool
7870 quad_address_offset_p (HOST_WIDE_INT offset)
7872 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7875 /* Return true if the ADDR is an acceptable address for a quad memory
7876 operation of mode MODE (either LQ/STQ for general purpose registers, or
7877 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7878 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7879 3.0 LXV/STXV instruction. */
7881 bool
7882 quad_address_p (rtx addr, machine_mode mode, bool strict)
7884 rtx op0, op1;
7886 if (GET_MODE_SIZE (mode) != 16)
7887 return false;
7889 if (legitimate_indirect_address_p (addr, strict))
7890 return true;
7892 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
7893 return false;
7895 if (GET_CODE (addr) != PLUS)
7896 return false;
7898 op0 = XEXP (addr, 0);
7899 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7900 return false;
7902 op1 = XEXP (addr, 1);
7903 if (!CONST_INT_P (op1))
7904 return false;
7906 return quad_address_offset_p (INTVAL (op1));
7909 /* Return true if this is a load or store quad operation. This function does
7910 not handle the atomic quad memory instructions. */
7912 bool
7913 quad_load_store_p (rtx op0, rtx op1)
7915 bool ret;
7917 if (!TARGET_QUAD_MEMORY)
7918 ret = false;
7920 else if (REG_P (op0) && MEM_P (op1))
7921 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7922 && quad_memory_operand (op1, GET_MODE (op1))
7923 && !reg_overlap_mentioned_p (op0, op1));
7925 else if (MEM_P (op0) && REG_P (op1))
7926 ret = (quad_memory_operand (op0, GET_MODE (op0))
7927 && quad_int_reg_operand (op1, GET_MODE (op1)));
7929 else
7930 ret = false;
7932 if (TARGET_DEBUG_ADDR)
7934 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7935 ret ? "true" : "false");
7936 debug_rtx (gen_rtx_SET (op0, op1));
7939 return ret;
7942 /* Given an address, return a constant offset term if one exists. */
7944 static rtx
7945 address_offset (rtx op)
7947 if (GET_CODE (op) == PRE_INC
7948 || GET_CODE (op) == PRE_DEC)
7949 op = XEXP (op, 0);
7950 else if (GET_CODE (op) == PRE_MODIFY
7951 || GET_CODE (op) == LO_SUM)
7952 op = XEXP (op, 1);
7954 if (GET_CODE (op) == CONST)
7955 op = XEXP (op, 0);
7957 if (GET_CODE (op) == PLUS)
7958 op = XEXP (op, 1);
7960 if (CONST_INT_P (op))
7961 return op;
7963 return NULL_RTX;
7966 /* Return true if the MEM operand is a memory operand suitable for use
7967 with a (full width, possibly multiple) gpr load/store. On
7968 powerpc64 this means the offset must be divisible by 4.
7969 Implements 'Y' constraint.
7971 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7972 a constraint function we know the operand has satisfied a suitable
7973 memory predicate. Also accept some odd rtl generated by reload
7974 (see rs6000_legitimize_reload_address for various forms). It is
7975 important that reload rtl be accepted by appropriate constraints
7976 but not by the operand predicate.
7978 Offsetting a lo_sum should not be allowed, except where we know by
7979 alignment that a 32k boundary is not crossed, but see the ???
7980 comment in rs6000_legitimize_reload_address. Note that by
7981 "offsetting" here we mean a further offset to access parts of the
7982 MEM. It's fine to have a lo_sum where the inner address is offset
7983 from a sym, since the same sym+offset will appear in the high part
7984 of the address calculation. */
7986 bool
7987 mem_operand_gpr (rtx op, machine_mode mode)
7989 unsigned HOST_WIDE_INT offset;
7990 int extra;
7991 rtx addr = XEXP (op, 0);
7993 op = address_offset (addr);
7994 if (op == NULL_RTX)
7995 return true;
7997 offset = INTVAL (op);
7998 if (TARGET_POWERPC64 && (offset & 3) != 0)
7999 return false;
8001 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8002 if (extra < 0)
8003 extra = 0;
8005 if (GET_CODE (addr) == LO_SUM)
8006 /* For lo_sum addresses, we must allow any offset except one that
8007 causes a wrap, so test only the low 16 bits. */
8008 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8010 return offset + 0x8000 < 0x10000u - extra;
8013 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8014 enforce an offset divisible by 4 even for 32-bit. */
8016 bool
8017 mem_operand_ds_form (rtx op, machine_mode mode)
8019 unsigned HOST_WIDE_INT offset;
8020 int extra;
8021 rtx addr = XEXP (op, 0);
8023 if (!offsettable_address_p (false, mode, addr))
8024 return false;
8026 op = address_offset (addr);
8027 if (op == NULL_RTX)
8028 return true;
8030 offset = INTVAL (op);
8031 if ((offset & 3) != 0)
8032 return false;
8034 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8035 if (extra < 0)
8036 extra = 0;
8038 if (GET_CODE (addr) == LO_SUM)
8039 /* For lo_sum addresses, we must allow any offset except one that
8040 causes a wrap, so test only the low 16 bits. */
8041 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8043 return offset + 0x8000 < 0x10000u - extra;
8046 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8048 static bool
8049 reg_offset_addressing_ok_p (machine_mode mode)
8051 switch (mode)
8053 case V16QImode:
8054 case V8HImode:
8055 case V4SFmode:
8056 case V4SImode:
8057 case V2DFmode:
8058 case V2DImode:
8059 case V1TImode:
8060 case TImode:
8061 case TFmode:
8062 case KFmode:
8063 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8064 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8065 a vector mode, if we want to use the VSX registers to move it around,
8066 we need to restrict ourselves to reg+reg addressing. Similarly for
8067 IEEE 128-bit floating point that is passed in a single vector
8068 register. */
8069 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8070 return mode_supports_vsx_dform_quad (mode);
8071 break;
8073 case V4HImode:
8074 case V2SImode:
8075 case V1DImode:
8076 case V2SFmode:
8077 /* Paired vector modes. Only reg+reg addressing is valid. */
8078 if (TARGET_PAIRED_FLOAT)
8079 return false;
8080 break;
8082 case SDmode:
8083 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8084 addressing for the LFIWZX and STFIWX instructions. */
8085 if (TARGET_NO_SDMODE_STACK)
8086 return false;
8087 break;
8089 default:
8090 break;
8093 return true;
8096 static bool
8097 virtual_stack_registers_memory_p (rtx op)
8099 int regnum;
8101 if (GET_CODE (op) == REG)
8102 regnum = REGNO (op);
8104 else if (GET_CODE (op) == PLUS
8105 && GET_CODE (XEXP (op, 0)) == REG
8106 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8107 regnum = REGNO (XEXP (op, 0));
8109 else
8110 return false;
8112 return (regnum >= FIRST_VIRTUAL_REGISTER
8113 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8116 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8117 is known to not straddle a 32k boundary. This function is used
8118 to determine whether -mcmodel=medium code can use TOC pointer
8119 relative addressing for OP. This means the alignment of the TOC
8120 pointer must also be taken into account, and unfortunately that is
8121 only 8 bytes. */
8123 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8124 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8125 #endif
8127 static bool
8128 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8129 machine_mode mode)
8131 tree decl;
8132 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8134 if (GET_CODE (op) != SYMBOL_REF)
8135 return false;
8137 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8138 SYMBOL_REF. */
8139 if (mode_supports_vsx_dform_quad (mode))
8140 return false;
8142 dsize = GET_MODE_SIZE (mode);
8143 decl = SYMBOL_REF_DECL (op);
8144 if (!decl)
8146 if (dsize == 0)
8147 return false;
8149 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8150 replacing memory addresses with an anchor plus offset. We
8151 could find the decl by rummaging around in the block->objects
8152 VEC for the given offset but that seems like too much work. */
8153 dalign = BITS_PER_UNIT;
8154 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8155 && SYMBOL_REF_ANCHOR_P (op)
8156 && SYMBOL_REF_BLOCK (op) != NULL)
8158 struct object_block *block = SYMBOL_REF_BLOCK (op);
8160 dalign = block->alignment;
8161 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8163 else if (CONSTANT_POOL_ADDRESS_P (op))
8165 /* It would be nice to have get_pool_align().. */
8166 machine_mode cmode = get_pool_mode (op);
8168 dalign = GET_MODE_ALIGNMENT (cmode);
8171 else if (DECL_P (decl))
8173 dalign = DECL_ALIGN (decl);
8175 if (dsize == 0)
8177 /* Allow BLKmode when the entire object is known to not
8178 cross a 32k boundary. */
8179 if (!DECL_SIZE_UNIT (decl))
8180 return false;
8182 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8183 return false;
8185 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8186 if (dsize > 32768)
8187 return false;
8189 dalign /= BITS_PER_UNIT;
8190 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8191 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8192 return dalign >= dsize;
8195 else
8196 gcc_unreachable ();
8198 /* Find how many bits of the alignment we know for this access. */
8199 dalign /= BITS_PER_UNIT;
8200 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8201 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8202 mask = dalign - 1;
8203 lsb = offset & -offset;
8204 mask &= lsb - 1;
8205 dalign = mask + 1;
8207 return dalign >= dsize;
8210 static bool
8211 constant_pool_expr_p (rtx op)
8213 rtx base, offset;
8215 split_const (op, &base, &offset);
8216 return (GET_CODE (base) == SYMBOL_REF
8217 && CONSTANT_POOL_ADDRESS_P (base)
8218 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8221 static const_rtx tocrel_base, tocrel_offset;
8223 /* Return true if OP is a toc pointer relative address (the output
8224 of create_TOC_reference). If STRICT, do not match non-split
8225 -mcmodel=large/medium toc pointer relative addresses. */
8227 bool
8228 toc_relative_expr_p (const_rtx op, bool strict)
8230 if (!TARGET_TOC)
8231 return false;
8233 if (TARGET_CMODEL != CMODEL_SMALL)
8235 /* When strict ensure we have everything tidy. */
8236 if (strict
8237 && !(GET_CODE (op) == LO_SUM
8238 && REG_P (XEXP (op, 0))
8239 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8240 return false;
8242 /* When not strict, allow non-split TOC addresses and also allow
8243 (lo_sum (high ..)) TOC addresses created during reload. */
8244 if (GET_CODE (op) == LO_SUM)
8245 op = XEXP (op, 1);
8248 tocrel_base = op;
8249 tocrel_offset = const0_rtx;
8250 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8252 tocrel_base = XEXP (op, 0);
8253 tocrel_offset = XEXP (op, 1);
8256 return (GET_CODE (tocrel_base) == UNSPEC
8257 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8260 /* Return true if X is a constant pool address, and also for cmodel=medium
8261 if X is a toc-relative address known to be offsettable within MODE. */
8263 bool
8264 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8265 bool strict)
8267 return (toc_relative_expr_p (x, strict)
8268 && (TARGET_CMODEL != CMODEL_MEDIUM
8269 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8270 || mode == QImode
8271 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8272 INTVAL (tocrel_offset), mode)));
8275 static bool
8276 legitimate_small_data_p (machine_mode mode, rtx x)
8278 return (DEFAULT_ABI == ABI_V4
8279 && !flag_pic && !TARGET_TOC
8280 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8281 && small_data_operand (x, mode));
8284 /* SPE offset addressing is limited to 5-bits worth of double words. */
8285 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8287 bool
8288 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8289 bool strict, bool worst_case)
8291 unsigned HOST_WIDE_INT offset;
8292 unsigned int extra;
8294 if (GET_CODE (x) != PLUS)
8295 return false;
8296 if (!REG_P (XEXP (x, 0)))
8297 return false;
8298 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8299 return false;
8300 if (mode_supports_vsx_dform_quad (mode))
8301 return quad_address_p (x, mode, strict);
8302 if (!reg_offset_addressing_ok_p (mode))
8303 return virtual_stack_registers_memory_p (x);
8304 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8305 return true;
8306 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8307 return false;
8309 offset = INTVAL (XEXP (x, 1));
8310 extra = 0;
8311 switch (mode)
8313 case V4HImode:
8314 case V2SImode:
8315 case V1DImode:
8316 case V2SFmode:
8317 /* SPE vector modes. */
8318 return SPE_CONST_OFFSET_OK (offset);
8320 case DFmode:
8321 case DDmode:
8322 case DImode:
8323 /* On e500v2, we may have:
8325 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8327 Which gets addressed with evldd instructions. */
8328 if (TARGET_E500_DOUBLE)
8329 return SPE_CONST_OFFSET_OK (offset);
8331 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8332 addressing. */
8333 if (VECTOR_MEM_VSX_P (mode))
8334 return false;
8336 if (!worst_case)
8337 break;
8338 if (!TARGET_POWERPC64)
8339 extra = 4;
8340 else if (offset & 3)
8341 return false;
8342 break;
8344 case TFmode:
8345 case IFmode:
8346 case KFmode:
8347 if (TARGET_E500_DOUBLE)
8348 return (SPE_CONST_OFFSET_OK (offset)
8349 && SPE_CONST_OFFSET_OK (offset + 8));
8350 /* fall through */
8352 case TDmode:
8353 case TImode:
8354 case PTImode:
8355 extra = 8;
8356 if (!worst_case)
8357 break;
8358 if (!TARGET_POWERPC64)
8359 extra = 12;
8360 else if (offset & 3)
8361 return false;
8362 break;
8364 default:
8365 break;
8368 offset += 0x8000;
8369 return offset < 0x10000 - extra;
8372 bool
8373 legitimate_indexed_address_p (rtx x, int strict)
8375 rtx op0, op1;
8377 if (GET_CODE (x) != PLUS)
8378 return false;
8380 op0 = XEXP (x, 0);
8381 op1 = XEXP (x, 1);
8383 /* Recognize the rtl generated by reload which we know will later be
8384 replaced with proper base and index regs. */
8385 if (!strict
8386 && reload_in_progress
8387 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8388 && REG_P (op1))
8389 return true;
8391 return (REG_P (op0) && REG_P (op1)
8392 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8393 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8394 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8395 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8398 bool
8399 avoiding_indexed_address_p (machine_mode mode)
8401 /* Avoid indexed addressing for modes that have non-indexed
8402 load/store instruction forms. */
8403 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8406 bool
8407 legitimate_indirect_address_p (rtx x, int strict)
8409 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8412 bool
8413 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8415 if (!TARGET_MACHO || !flag_pic
8416 || mode != SImode || GET_CODE (x) != MEM)
8417 return false;
8418 x = XEXP (x, 0);
8420 if (GET_CODE (x) != LO_SUM)
8421 return false;
8422 if (GET_CODE (XEXP (x, 0)) != REG)
8423 return false;
8424 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8425 return false;
8426 x = XEXP (x, 1);
8428 return CONSTANT_P (x);
8431 static bool
8432 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8434 if (GET_CODE (x) != LO_SUM)
8435 return false;
8436 if (GET_CODE (XEXP (x, 0)) != REG)
8437 return false;
8438 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8439 return false;
8440 /* quad word addresses are restricted, and we can't use LO_SUM. */
8441 if (mode_supports_vsx_dform_quad (mode))
8442 return false;
8443 /* Restrict addressing for DI because of our SUBREG hackery. */
8444 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8445 return false;
8446 x = XEXP (x, 1);
8448 if (TARGET_ELF || TARGET_MACHO)
8450 bool large_toc_ok;
8452 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8453 return false;
8454 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8455 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8456 recognizes some LO_SUM addresses as valid although this
8457 function says opposite. In most cases, LRA through different
8458 transformations can generate correct code for address reloads.
8459 It can not manage only some LO_SUM cases. So we need to add
8460 code analogous to one in rs6000_legitimize_reload_address for
8461 LOW_SUM here saying that some addresses are still valid. */
8462 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8463 && small_toc_ref (x, VOIDmode));
8464 if (TARGET_TOC && ! large_toc_ok)
8465 return false;
8466 if (GET_MODE_NUNITS (mode) != 1)
8467 return false;
8468 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8469 && !(/* ??? Assume floating point reg based on mode? */
8470 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8471 && (mode == DFmode || mode == DDmode)))
8472 return false;
8474 return CONSTANT_P (x) || large_toc_ok;
8477 return false;
8481 /* Try machine-dependent ways of modifying an illegitimate address
8482 to be legitimate. If we find one, return the new, valid address.
8483 This is used from only one place: `memory_address' in explow.c.
8485 OLDX is the address as it was before break_out_memory_refs was
8486 called. In some cases it is useful to look at this to decide what
8487 needs to be done.
8489 It is always safe for this function to do nothing. It exists to
8490 recognize opportunities to optimize the output.
8492 On RS/6000, first check for the sum of a register with a constant
8493 integer that is out of range. If so, generate code to add the
8494 constant with the low-order 16 bits masked to the register and force
8495 this result into another register (this can be done with `cau').
8496 Then generate an address of REG+(CONST&0xffff), allowing for the
8497 possibility of bit 16 being a one.
8499 Then check for the sum of a register and something not constant, try to
8500 load the other things into a register and return the sum. */
8502 static rtx
8503 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8504 machine_mode mode)
8506 unsigned int extra;
8508 if (!reg_offset_addressing_ok_p (mode)
8509 || mode_supports_vsx_dform_quad (mode))
8511 if (virtual_stack_registers_memory_p (x))
8512 return x;
8514 /* In theory we should not be seeing addresses of the form reg+0,
8515 but just in case it is generated, optimize it away. */
8516 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8517 return force_reg (Pmode, XEXP (x, 0));
8519 /* For TImode with load/store quad, restrict addresses to just a single
8520 pointer, so it works with both GPRs and VSX registers. */
8521 /* Make sure both operands are registers. */
8522 else if (GET_CODE (x) == PLUS
8523 && (mode != TImode || !TARGET_VSX_TIMODE))
8524 return gen_rtx_PLUS (Pmode,
8525 force_reg (Pmode, XEXP (x, 0)),
8526 force_reg (Pmode, XEXP (x, 1)));
8527 else
8528 return force_reg (Pmode, x);
8530 if (GET_CODE (x) == SYMBOL_REF)
8532 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8533 if (model != 0)
8534 return rs6000_legitimize_tls_address (x, model);
8537 extra = 0;
8538 switch (mode)
8540 case TFmode:
8541 case TDmode:
8542 case TImode:
8543 case PTImode:
8544 case IFmode:
8545 case KFmode:
8546 /* As in legitimate_offset_address_p we do not assume
8547 worst-case. The mode here is just a hint as to the registers
8548 used. A TImode is usually in gprs, but may actually be in
8549 fprs. Leave worst-case scenario for reload to handle via
8550 insn constraints. PTImode is only GPRs. */
8551 extra = 8;
8552 break;
8553 default:
8554 break;
8557 if (GET_CODE (x) == PLUS
8558 && GET_CODE (XEXP (x, 0)) == REG
8559 && GET_CODE (XEXP (x, 1)) == CONST_INT
8560 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8561 >= 0x10000 - extra)
8562 && !(SPE_VECTOR_MODE (mode)
8563 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
8565 HOST_WIDE_INT high_int, low_int;
8566 rtx sum;
8567 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8568 if (low_int >= 0x8000 - extra)
8569 low_int = 0;
8570 high_int = INTVAL (XEXP (x, 1)) - low_int;
8571 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8572 GEN_INT (high_int)), 0);
8573 return plus_constant (Pmode, sum, low_int);
8575 else if (GET_CODE (x) == PLUS
8576 && GET_CODE (XEXP (x, 0)) == REG
8577 && GET_CODE (XEXP (x, 1)) != CONST_INT
8578 && GET_MODE_NUNITS (mode) == 1
8579 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8580 || (/* ??? Assume floating point reg based on mode? */
8581 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8582 && (mode == DFmode || mode == DDmode)))
8583 && !avoiding_indexed_address_p (mode))
8585 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8586 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8588 else if (SPE_VECTOR_MODE (mode)
8589 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
8591 if (mode == DImode)
8592 return x;
8593 /* We accept [reg + reg] and [reg + OFFSET]. */
8595 if (GET_CODE (x) == PLUS)
8597 rtx op1 = XEXP (x, 0);
8598 rtx op2 = XEXP (x, 1);
8599 rtx y;
8601 op1 = force_reg (Pmode, op1);
8603 if (GET_CODE (op2) != REG
8604 && (GET_CODE (op2) != CONST_INT
8605 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
8606 || (GET_MODE_SIZE (mode) > 8
8607 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
8608 op2 = force_reg (Pmode, op2);
8610 /* We can't always do [reg + reg] for these, because [reg +
8611 reg + offset] is not a legitimate addressing mode. */
8612 y = gen_rtx_PLUS (Pmode, op1, op2);
8614 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8615 return force_reg (Pmode, y);
8616 else
8617 return y;
8620 return force_reg (Pmode, x);
8622 else if ((TARGET_ELF
8623 #if TARGET_MACHO
8624 || !MACHO_DYNAMIC_NO_PIC_P
8625 #endif
8627 && TARGET_32BIT
8628 && TARGET_NO_TOC
8629 && ! flag_pic
8630 && GET_CODE (x) != CONST_INT
8631 && GET_CODE (x) != CONST_WIDE_INT
8632 && GET_CODE (x) != CONST_DOUBLE
8633 && CONSTANT_P (x)
8634 && GET_MODE_NUNITS (mode) == 1
8635 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8636 || (/* ??? Assume floating point reg based on mode? */
8637 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8638 && (mode == DFmode || mode == DDmode))))
8640 rtx reg = gen_reg_rtx (Pmode);
8641 if (TARGET_ELF)
8642 emit_insn (gen_elf_high (reg, x));
8643 else
8644 emit_insn (gen_macho_high (reg, x));
8645 return gen_rtx_LO_SUM (Pmode, reg, x);
8647 else if (TARGET_TOC
8648 && GET_CODE (x) == SYMBOL_REF
8649 && constant_pool_expr_p (x)
8650 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8651 return create_TOC_reference (x, NULL_RTX);
8652 else
8653 return x;
8656 /* Debug version of rs6000_legitimize_address. */
8657 static rtx
8658 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8660 rtx ret;
8661 rtx_insn *insns;
8663 start_sequence ();
8664 ret = rs6000_legitimize_address (x, oldx, mode);
8665 insns = get_insns ();
8666 end_sequence ();
8668 if (ret != x)
8670 fprintf (stderr,
8671 "\nrs6000_legitimize_address: mode %s, old code %s, "
8672 "new code %s, modified\n",
8673 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8674 GET_RTX_NAME (GET_CODE (ret)));
8676 fprintf (stderr, "Original address:\n");
8677 debug_rtx (x);
8679 fprintf (stderr, "oldx:\n");
8680 debug_rtx (oldx);
8682 fprintf (stderr, "New address:\n");
8683 debug_rtx (ret);
8685 if (insns)
8687 fprintf (stderr, "Insns added:\n");
8688 debug_rtx_list (insns, 20);
8691 else
8693 fprintf (stderr,
8694 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8695 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8697 debug_rtx (x);
8700 if (insns)
8701 emit_insn (insns);
8703 return ret;
8706 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8707 We need to emit DTP-relative relocations. */
8709 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8710 static void
8711 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8713 switch (size)
8715 case 4:
8716 fputs ("\t.long\t", file);
8717 break;
8718 case 8:
8719 fputs (DOUBLE_INT_ASM_OP, file);
8720 break;
8721 default:
8722 gcc_unreachable ();
8724 output_addr_const (file, x);
8725 if (TARGET_ELF)
8726 fputs ("@dtprel+0x8000", file);
8727 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8729 switch (SYMBOL_REF_TLS_MODEL (x))
8731 case 0:
8732 break;
8733 case TLS_MODEL_LOCAL_EXEC:
8734 fputs ("@le", file);
8735 break;
8736 case TLS_MODEL_INITIAL_EXEC:
8737 fputs ("@ie", file);
8738 break;
8739 case TLS_MODEL_GLOBAL_DYNAMIC:
8740 case TLS_MODEL_LOCAL_DYNAMIC:
8741 fputs ("@m", file);
8742 break;
8743 default:
8744 gcc_unreachable ();
8749 /* Return true if X is a symbol that refers to real (rather than emulated)
8750 TLS. */
8752 static bool
8753 rs6000_real_tls_symbol_ref_p (rtx x)
8755 return (GET_CODE (x) == SYMBOL_REF
8756 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8759 /* In the name of slightly smaller debug output, and to cater to
8760 general assembler lossage, recognize various UNSPEC sequences
8761 and turn them back into a direct symbol reference. */
8763 static rtx
8764 rs6000_delegitimize_address (rtx orig_x)
8766 rtx x, y, offset;
8768 orig_x = delegitimize_mem_from_attrs (orig_x);
8769 x = orig_x;
8770 if (MEM_P (x))
8771 x = XEXP (x, 0);
8773 y = x;
8774 if (TARGET_CMODEL != CMODEL_SMALL
8775 && GET_CODE (y) == LO_SUM)
8776 y = XEXP (y, 1);
8778 offset = NULL_RTX;
8779 if (GET_CODE (y) == PLUS
8780 && GET_MODE (y) == Pmode
8781 && CONST_INT_P (XEXP (y, 1)))
8783 offset = XEXP (y, 1);
8784 y = XEXP (y, 0);
8787 if (GET_CODE (y) == UNSPEC
8788 && XINT (y, 1) == UNSPEC_TOCREL)
8790 y = XVECEXP (y, 0, 0);
8792 #ifdef HAVE_AS_TLS
8793 /* Do not associate thread-local symbols with the original
8794 constant pool symbol. */
8795 if (TARGET_XCOFF
8796 && GET_CODE (y) == SYMBOL_REF
8797 && CONSTANT_POOL_ADDRESS_P (y)
8798 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8799 return orig_x;
8800 #endif
8802 if (offset != NULL_RTX)
8803 y = gen_rtx_PLUS (Pmode, y, offset);
8804 if (!MEM_P (orig_x))
8805 return y;
8806 else
8807 return replace_equiv_address_nv (orig_x, y);
8810 if (TARGET_MACHO
8811 && GET_CODE (orig_x) == LO_SUM
8812 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8814 y = XEXP (XEXP (orig_x, 1), 0);
8815 if (GET_CODE (y) == UNSPEC
8816 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8817 return XVECEXP (y, 0, 0);
8820 return orig_x;
8823 /* Return true if X shouldn't be emitted into the debug info.
8824 The linker doesn't like .toc section references from
8825 .debug_* sections, so reject .toc section symbols. */
8827 static bool
8828 rs6000_const_not_ok_for_debug_p (rtx x)
8830 if (GET_CODE (x) == SYMBOL_REF
8831 && CONSTANT_POOL_ADDRESS_P (x))
8833 rtx c = get_pool_constant (x);
8834 machine_mode cmode = get_pool_mode (x);
8835 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8836 return true;
8839 return false;
8842 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8844 static GTY(()) rtx rs6000_tls_symbol;
8845 static rtx
8846 rs6000_tls_get_addr (void)
8848 if (!rs6000_tls_symbol)
8849 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8851 return rs6000_tls_symbol;
8854 /* Construct the SYMBOL_REF for TLS GOT references. */
8856 static GTY(()) rtx rs6000_got_symbol;
8857 static rtx
8858 rs6000_got_sym (void)
8860 if (!rs6000_got_symbol)
8862 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8863 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8864 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8867 return rs6000_got_symbol;
8870 /* AIX Thread-Local Address support. */
8872 static rtx
8873 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8875 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8876 const char *name;
8877 char *tlsname;
8879 name = XSTR (addr, 0);
8880 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8881 or the symbol will be in TLS private data section. */
8882 if (name[strlen (name) - 1] != ']'
8883 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8884 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8886 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8887 strcpy (tlsname, name);
8888 strcat (tlsname,
8889 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8890 tlsaddr = copy_rtx (addr);
8891 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8893 else
8894 tlsaddr = addr;
8896 /* Place addr into TOC constant pool. */
8897 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8899 /* Output the TOC entry and create the MEM referencing the value. */
8900 if (constant_pool_expr_p (XEXP (sym, 0))
8901 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8903 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8904 mem = gen_const_mem (Pmode, tocref);
8905 set_mem_alias_set (mem, get_TOC_alias_set ());
8907 else
8908 return sym;
8910 /* Use global-dynamic for local-dynamic. */
8911 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8912 || model == TLS_MODEL_LOCAL_DYNAMIC)
8914 /* Create new TOC reference for @m symbol. */
8915 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8916 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8917 strcpy (tlsname, "*LCM");
8918 strcat (tlsname, name + 3);
8919 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8920 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8921 tocref = create_TOC_reference (modaddr, NULL_RTX);
8922 rtx modmem = gen_const_mem (Pmode, tocref);
8923 set_mem_alias_set (modmem, get_TOC_alias_set ());
8925 rtx modreg = gen_reg_rtx (Pmode);
8926 emit_insn (gen_rtx_SET (modreg, modmem));
8928 tmpreg = gen_reg_rtx (Pmode);
8929 emit_insn (gen_rtx_SET (tmpreg, mem));
8931 dest = gen_reg_rtx (Pmode);
8932 if (TARGET_32BIT)
8933 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8934 else
8935 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8936 return dest;
8938 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8939 else if (TARGET_32BIT)
8941 tlsreg = gen_reg_rtx (SImode);
8942 emit_insn (gen_tls_get_tpointer (tlsreg));
8944 else
8945 tlsreg = gen_rtx_REG (DImode, 13);
8947 /* Load the TOC value into temporary register. */
8948 tmpreg = gen_reg_rtx (Pmode);
8949 emit_insn (gen_rtx_SET (tmpreg, mem));
8950 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8951 gen_rtx_MINUS (Pmode, addr, tlsreg));
8953 /* Add TOC symbol value to TLS pointer. */
8954 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8956 return dest;
8959 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8960 this (thread-local) address. */
8962 static rtx
8963 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8965 rtx dest, insn;
8967 if (TARGET_XCOFF)
8968 return rs6000_legitimize_tls_address_aix (addr, model);
8970 dest = gen_reg_rtx (Pmode);
8971 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8973 rtx tlsreg;
8975 if (TARGET_64BIT)
8977 tlsreg = gen_rtx_REG (Pmode, 13);
8978 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8980 else
8982 tlsreg = gen_rtx_REG (Pmode, 2);
8983 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8985 emit_insn (insn);
8987 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8989 rtx tlsreg, tmp;
8991 tmp = gen_reg_rtx (Pmode);
8992 if (TARGET_64BIT)
8994 tlsreg = gen_rtx_REG (Pmode, 13);
8995 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8997 else
8999 tlsreg = gen_rtx_REG (Pmode, 2);
9000 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9002 emit_insn (insn);
9003 if (TARGET_64BIT)
9004 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9005 else
9006 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9007 emit_insn (insn);
9009 else
9011 rtx r3, got, tga, tmp1, tmp2, call_insn;
9013 /* We currently use relocations like @got@tlsgd for tls, which
9014 means the linker will handle allocation of tls entries, placing
9015 them in the .got section. So use a pointer to the .got section,
9016 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9017 or to secondary GOT sections used by 32-bit -fPIC. */
9018 if (TARGET_64BIT)
9019 got = gen_rtx_REG (Pmode, 2);
9020 else
9022 if (flag_pic == 1)
9023 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9024 else
9026 rtx gsym = rs6000_got_sym ();
9027 got = gen_reg_rtx (Pmode);
9028 if (flag_pic == 0)
9029 rs6000_emit_move (got, gsym, Pmode);
9030 else
9032 rtx mem, lab, last;
9034 tmp1 = gen_reg_rtx (Pmode);
9035 tmp2 = gen_reg_rtx (Pmode);
9036 mem = gen_const_mem (Pmode, tmp1);
9037 lab = gen_label_rtx ();
9038 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9039 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9040 if (TARGET_LINK_STACK)
9041 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9042 emit_move_insn (tmp2, mem);
9043 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9044 set_unique_reg_note (last, REG_EQUAL, gsym);
9049 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9051 tga = rs6000_tls_get_addr ();
9052 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9053 1, const0_rtx, Pmode);
9055 r3 = gen_rtx_REG (Pmode, 3);
9056 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9058 if (TARGET_64BIT)
9059 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9060 else
9061 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9063 else if (DEFAULT_ABI == ABI_V4)
9064 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9065 else
9066 gcc_unreachable ();
9067 call_insn = last_call_insn ();
9068 PATTERN (call_insn) = insn;
9069 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9070 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9071 pic_offset_table_rtx);
9073 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9075 tga = rs6000_tls_get_addr ();
9076 tmp1 = gen_reg_rtx (Pmode);
9077 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9078 1, const0_rtx, Pmode);
9080 r3 = gen_rtx_REG (Pmode, 3);
9081 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9083 if (TARGET_64BIT)
9084 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9085 else
9086 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9088 else if (DEFAULT_ABI == ABI_V4)
9089 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9090 else
9091 gcc_unreachable ();
9092 call_insn = last_call_insn ();
9093 PATTERN (call_insn) = insn;
9094 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9095 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9096 pic_offset_table_rtx);
9098 if (rs6000_tls_size == 16)
9100 if (TARGET_64BIT)
9101 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9102 else
9103 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9105 else if (rs6000_tls_size == 32)
9107 tmp2 = gen_reg_rtx (Pmode);
9108 if (TARGET_64BIT)
9109 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9110 else
9111 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9112 emit_insn (insn);
9113 if (TARGET_64BIT)
9114 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9115 else
9116 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9118 else
9120 tmp2 = gen_reg_rtx (Pmode);
9121 if (TARGET_64BIT)
9122 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9123 else
9124 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9125 emit_insn (insn);
9126 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9128 emit_insn (insn);
9130 else
9132 /* IE, or 64-bit offset LE. */
9133 tmp2 = gen_reg_rtx (Pmode);
9134 if (TARGET_64BIT)
9135 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9136 else
9137 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9138 emit_insn (insn);
9139 if (TARGET_64BIT)
9140 insn = gen_tls_tls_64 (dest, tmp2, addr);
9141 else
9142 insn = gen_tls_tls_32 (dest, tmp2, addr);
9143 emit_insn (insn);
9147 return dest;
9150 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9152 static bool
9153 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9155 if (GET_CODE (x) == HIGH
9156 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9157 return true;
9159 /* A TLS symbol in the TOC cannot contain a sum. */
9160 if (GET_CODE (x) == CONST
9161 && GET_CODE (XEXP (x, 0)) == PLUS
9162 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9163 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9164 return true;
9166 /* Do not place an ELF TLS symbol in the constant pool. */
9167 return TARGET_ELF && tls_referenced_p (x);
9170 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9171 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9172 can be addressed relative to the toc pointer. */
9174 static bool
9175 use_toc_relative_ref (rtx sym, machine_mode mode)
9177 return ((constant_pool_expr_p (sym)
9178 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9179 get_pool_mode (sym)))
9180 || (TARGET_CMODEL == CMODEL_MEDIUM
9181 && SYMBOL_REF_LOCAL_P (sym)
9182 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9185 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9186 replace the input X, or the original X if no replacement is called for.
9187 The output parameter *WIN is 1 if the calling macro should goto WIN,
9188 0 if it should not.
9190 For RS/6000, we wish to handle large displacements off a base
9191 register by splitting the addend across an addiu/addis and the mem insn.
9192 This cuts number of extra insns needed from 3 to 1.
9194 On Darwin, we use this to generate code for floating point constants.
9195 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9196 The Darwin code is inside #if TARGET_MACHO because only then are the
9197 machopic_* functions defined. */
9198 static rtx
9199 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9200 int opnum, int type,
9201 int ind_levels ATTRIBUTE_UNUSED, int *win)
9203 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9204 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9206 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9207 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9208 if (reg_offset_p
9209 && opnum == 1
9210 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9211 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9212 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9213 && TARGET_P9_VECTOR)
9214 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9215 && TARGET_P9_VECTOR)))
9216 reg_offset_p = false;
9218 /* We must recognize output that we have already generated ourselves. */
9219 if (GET_CODE (x) == PLUS
9220 && GET_CODE (XEXP (x, 0)) == PLUS
9221 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9222 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9223 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9225 if (TARGET_DEBUG_ADDR)
9227 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9228 debug_rtx (x);
9230 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9231 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9232 opnum, (enum reload_type) type);
9233 *win = 1;
9234 return x;
9237 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9238 if (GET_CODE (x) == LO_SUM
9239 && GET_CODE (XEXP (x, 0)) == HIGH)
9241 if (TARGET_DEBUG_ADDR)
9243 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9244 debug_rtx (x);
9246 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9247 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9248 opnum, (enum reload_type) type);
9249 *win = 1;
9250 return x;
9253 #if TARGET_MACHO
9254 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9255 && GET_CODE (x) == LO_SUM
9256 && GET_CODE (XEXP (x, 0)) == PLUS
9257 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9258 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9259 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9260 && machopic_operand_p (XEXP (x, 1)))
9262 /* Result of previous invocation of this function on Darwin
9263 floating point constant. */
9264 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9265 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9266 opnum, (enum reload_type) type);
9267 *win = 1;
9268 return x;
9270 #endif
9272 if (TARGET_CMODEL != CMODEL_SMALL
9273 && reg_offset_p
9274 && !quad_offset_p
9275 && small_toc_ref (x, VOIDmode))
9277 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9278 x = gen_rtx_LO_SUM (Pmode, hi, x);
9279 if (TARGET_DEBUG_ADDR)
9281 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9282 debug_rtx (x);
9284 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9285 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9286 opnum, (enum reload_type) type);
9287 *win = 1;
9288 return x;
9291 if (GET_CODE (x) == PLUS
9292 && REG_P (XEXP (x, 0))
9293 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9294 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9295 && CONST_INT_P (XEXP (x, 1))
9296 && reg_offset_p
9297 && !SPE_VECTOR_MODE (mode)
9298 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9299 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9301 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9302 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9303 HOST_WIDE_INT high
9304 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9306 /* Check for 32-bit overflow or quad addresses with one of the
9307 four least significant bits set. */
9308 if (high + low != val
9309 || (quad_offset_p && (low & 0xf)))
9311 *win = 0;
9312 return x;
9315 /* Reload the high part into a base reg; leave the low part
9316 in the mem directly. */
9318 x = gen_rtx_PLUS (GET_MODE (x),
9319 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9320 GEN_INT (high)),
9321 GEN_INT (low));
9323 if (TARGET_DEBUG_ADDR)
9325 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9326 debug_rtx (x);
9328 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9329 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9330 opnum, (enum reload_type) type);
9331 *win = 1;
9332 return x;
9335 if (GET_CODE (x) == SYMBOL_REF
9336 && reg_offset_p
9337 && !quad_offset_p
9338 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9339 && !SPE_VECTOR_MODE (mode)
9340 #if TARGET_MACHO
9341 && DEFAULT_ABI == ABI_DARWIN
9342 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9343 && machopic_symbol_defined_p (x)
9344 #else
9345 && DEFAULT_ABI == ABI_V4
9346 && !flag_pic
9347 #endif
9348 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9349 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9350 without fprs.
9351 ??? Assume floating point reg based on mode? This assumption is
9352 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9353 where reload ends up doing a DFmode load of a constant from
9354 mem using two gprs. Unfortunately, at this point reload
9355 hasn't yet selected regs so poking around in reload data
9356 won't help and even if we could figure out the regs reliably,
9357 we'd still want to allow this transformation when the mem is
9358 naturally aligned. Since we say the address is good here, we
9359 can't disable offsets from LO_SUMs in mem_operand_gpr.
9360 FIXME: Allow offset from lo_sum for other modes too, when
9361 mem is sufficiently aligned.
9363 Also disallow this if the type can go in VMX/Altivec registers, since
9364 those registers do not have d-form (reg+offset) address modes. */
9365 && !reg_addr[mode].scalar_in_vmx_p
9366 && mode != TFmode
9367 && mode != TDmode
9368 && mode != IFmode
9369 && mode != KFmode
9370 && (mode != TImode || !TARGET_VSX_TIMODE)
9371 && mode != PTImode
9372 && (mode != DImode || TARGET_POWERPC64)
9373 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9374 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9376 #if TARGET_MACHO
9377 if (flag_pic)
9379 rtx offset = machopic_gen_offset (x);
9380 x = gen_rtx_LO_SUM (GET_MODE (x),
9381 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9382 gen_rtx_HIGH (Pmode, offset)), offset);
9384 else
9385 #endif
9386 x = gen_rtx_LO_SUM (GET_MODE (x),
9387 gen_rtx_HIGH (Pmode, x), x);
9389 if (TARGET_DEBUG_ADDR)
9391 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9392 debug_rtx (x);
9394 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9395 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9396 opnum, (enum reload_type) type);
9397 *win = 1;
9398 return x;
9401 /* Reload an offset address wrapped by an AND that represents the
9402 masking of the lower bits. Strip the outer AND and let reload
9403 convert the offset address into an indirect address. For VSX,
9404 force reload to create the address with an AND in a separate
9405 register, because we can't guarantee an altivec register will
9406 be used. */
9407 if (VECTOR_MEM_ALTIVEC_P (mode)
9408 && GET_CODE (x) == AND
9409 && GET_CODE (XEXP (x, 0)) == PLUS
9410 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9411 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9412 && GET_CODE (XEXP (x, 1)) == CONST_INT
9413 && INTVAL (XEXP (x, 1)) == -16)
9415 x = XEXP (x, 0);
9416 *win = 1;
9417 return x;
9420 if (TARGET_TOC
9421 && reg_offset_p
9422 && !quad_offset_p
9423 && GET_CODE (x) == SYMBOL_REF
9424 && use_toc_relative_ref (x, mode))
9426 x = create_TOC_reference (x, NULL_RTX);
9427 if (TARGET_CMODEL != CMODEL_SMALL)
9429 if (TARGET_DEBUG_ADDR)
9431 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9432 debug_rtx (x);
9434 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9435 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9436 opnum, (enum reload_type) type);
9438 *win = 1;
9439 return x;
9441 *win = 0;
9442 return x;
9445 /* Debug version of rs6000_legitimize_reload_address. */
9446 static rtx
9447 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9448 int opnum, int type,
9449 int ind_levels, int *win)
9451 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9452 ind_levels, win);
9453 fprintf (stderr,
9454 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9455 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9456 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9457 debug_rtx (x);
9459 if (x == ret)
9460 fprintf (stderr, "Same address returned\n");
9461 else if (!ret)
9462 fprintf (stderr, "NULL returned\n");
9463 else
9465 fprintf (stderr, "New address:\n");
9466 debug_rtx (ret);
9469 return ret;
9472 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9473 that is a valid memory address for an instruction.
9474 The MODE argument is the machine mode for the MEM expression
9475 that wants to use this address.
9477 On the RS/6000, there are four valid address: a SYMBOL_REF that
9478 refers to a constant pool entry of an address (or the sum of it
9479 plus a constant), a short (16-bit signed) constant plus a register,
9480 the sum of two registers, or a register indirect, possibly with an
9481 auto-increment. For DFmode, DDmode and DImode with a constant plus
9482 register, we must ensure that both words are addressable or PowerPC64
9483 with offset word aligned.
9485 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9486 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9487 because adjacent memory cells are accessed by adding word-sized offsets
9488 during assembly output. */
9489 static bool
9490 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9492 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9493 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9495 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9496 if (VECTOR_MEM_ALTIVEC_P (mode)
9497 && GET_CODE (x) == AND
9498 && GET_CODE (XEXP (x, 1)) == CONST_INT
9499 && INTVAL (XEXP (x, 1)) == -16)
9500 x = XEXP (x, 0);
9502 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9503 return 0;
9504 if (legitimate_indirect_address_p (x, reg_ok_strict))
9505 return 1;
9506 if (TARGET_UPDATE
9507 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9508 && mode_supports_pre_incdec_p (mode)
9509 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9510 return 1;
9511 /* Handle restricted vector d-form offsets in ISA 3.0. */
9512 if (quad_offset_p)
9514 if (quad_address_p (x, mode, reg_ok_strict))
9515 return 1;
9517 else if (virtual_stack_registers_memory_p (x))
9518 return 1;
9520 else if (reg_offset_p)
9522 if (legitimate_small_data_p (mode, x))
9523 return 1;
9524 if (legitimate_constant_pool_address_p (x, mode,
9525 reg_ok_strict || lra_in_progress))
9526 return 1;
9527 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9528 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9529 return 1;
9532 /* For TImode, if we have TImode in VSX registers, only allow register
9533 indirect addresses. This will allow the values to go in either GPRs
9534 or VSX registers without reloading. The vector types would tend to
9535 go into VSX registers, so we allow REG+REG, while TImode seems
9536 somewhat split, in that some uses are GPR based, and some VSX based. */
9537 /* FIXME: We could loosen this by changing the following to
9538 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9539 but currently we cannot allow REG+REG addressing for TImode. See
9540 PR72827 for complete details on how this ends up hoodwinking DSE. */
9541 if (mode == TImode && TARGET_VSX_TIMODE)
9542 return 0;
9543 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9544 if (! reg_ok_strict
9545 && reg_offset_p
9546 && GET_CODE (x) == PLUS
9547 && GET_CODE (XEXP (x, 0)) == REG
9548 && (XEXP (x, 0) == virtual_stack_vars_rtx
9549 || XEXP (x, 0) == arg_pointer_rtx)
9550 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9551 return 1;
9552 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9553 return 1;
9554 if (!FLOAT128_2REG_P (mode)
9555 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9556 || TARGET_POWERPC64
9557 || (mode != DFmode && mode != DDmode)
9558 || (TARGET_E500_DOUBLE && mode != DDmode))
9559 && (TARGET_POWERPC64 || mode != DImode)
9560 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9561 && mode != PTImode
9562 && !avoiding_indexed_address_p (mode)
9563 && legitimate_indexed_address_p (x, reg_ok_strict))
9564 return 1;
9565 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9566 && mode_supports_pre_modify_p (mode)
9567 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9568 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9569 reg_ok_strict, false)
9570 || (!avoiding_indexed_address_p (mode)
9571 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9572 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9573 return 1;
9574 if (reg_offset_p && !quad_offset_p
9575 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9576 return 1;
9577 return 0;
9580 /* Debug version of rs6000_legitimate_address_p. */
9581 static bool
9582 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9583 bool reg_ok_strict)
9585 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9586 fprintf (stderr,
9587 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9588 "strict = %d, reload = %s, code = %s\n",
9589 ret ? "true" : "false",
9590 GET_MODE_NAME (mode),
9591 reg_ok_strict,
9592 (reload_completed
9593 ? "after"
9594 : (reload_in_progress ? "progress" : "before")),
9595 GET_RTX_NAME (GET_CODE (x)));
9596 debug_rtx (x);
9598 return ret;
9601 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9603 static bool
9604 rs6000_mode_dependent_address_p (const_rtx addr,
9605 addr_space_t as ATTRIBUTE_UNUSED)
9607 return rs6000_mode_dependent_address_ptr (addr);
9610 /* Go to LABEL if ADDR (a legitimate address expression)
9611 has an effect that depends on the machine mode it is used for.
9613 On the RS/6000 this is true of all integral offsets (since AltiVec
9614 and VSX modes don't allow them) or is a pre-increment or decrement.
9616 ??? Except that due to conceptual problems in offsettable_address_p
9617 we can't really report the problems of integral offsets. So leave
9618 this assuming that the adjustable offset must be valid for the
9619 sub-words of a TFmode operand, which is what we had before. */
9621 static bool
9622 rs6000_mode_dependent_address (const_rtx addr)
9624 switch (GET_CODE (addr))
9626 case PLUS:
9627 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9628 is considered a legitimate address before reload, so there
9629 are no offset restrictions in that case. Note that this
9630 condition is safe in strict mode because any address involving
9631 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9632 been rejected as illegitimate. */
9633 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9634 && XEXP (addr, 0) != arg_pointer_rtx
9635 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9637 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9638 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9640 break;
9642 case LO_SUM:
9643 /* Anything in the constant pool is sufficiently aligned that
9644 all bytes have the same high part address. */
9645 return !legitimate_constant_pool_address_p (addr, QImode, false);
9647 /* Auto-increment cases are now treated generically in recog.c. */
9648 case PRE_MODIFY:
9649 return TARGET_UPDATE;
9651 /* AND is only allowed in Altivec loads. */
9652 case AND:
9653 return true;
9655 default:
9656 break;
9659 return false;
9662 /* Debug version of rs6000_mode_dependent_address. */
9663 static bool
9664 rs6000_debug_mode_dependent_address (const_rtx addr)
9666 bool ret = rs6000_mode_dependent_address (addr);
9668 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9669 ret ? "true" : "false");
9670 debug_rtx (addr);
9672 return ret;
9675 /* Implement FIND_BASE_TERM. */
9678 rs6000_find_base_term (rtx op)
9680 rtx base;
9682 base = op;
9683 if (GET_CODE (base) == CONST)
9684 base = XEXP (base, 0);
9685 if (GET_CODE (base) == PLUS)
9686 base = XEXP (base, 0);
9687 if (GET_CODE (base) == UNSPEC)
9688 switch (XINT (base, 1))
9690 case UNSPEC_TOCREL:
9691 case UNSPEC_MACHOPIC_OFFSET:
9692 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9693 for aliasing purposes. */
9694 return XVECEXP (base, 0, 0);
9697 return op;
9700 /* More elaborate version of recog's offsettable_memref_p predicate
9701 that works around the ??? note of rs6000_mode_dependent_address.
9702 In particular it accepts
9704 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9706 in 32-bit mode, that the recog predicate rejects. */
9708 static bool
9709 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9711 bool worst_case;
9713 if (!MEM_P (op))
9714 return false;
9716 /* First mimic offsettable_memref_p. */
9717 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9718 return true;
9720 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9721 the latter predicate knows nothing about the mode of the memory
9722 reference and, therefore, assumes that it is the largest supported
9723 mode (TFmode). As a consequence, legitimate offsettable memory
9724 references are rejected. rs6000_legitimate_offset_address_p contains
9725 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9726 at least with a little bit of help here given that we know the
9727 actual registers used. */
9728 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9729 || GET_MODE_SIZE (reg_mode) == 4);
9730 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9731 true, worst_case);
9734 /* Determine the reassociation width to be used in reassociate_bb.
9735 This takes into account how many parallel operations we
9736 can actually do of a given type, and also the latency.
9738 int add/sub 6/cycle
9739 mul 2/cycle
9740 vect add/sub/mul 2/cycle
9741 fp add/sub/mul 2/cycle
9742 dfp 1/cycle
9745 static int
9746 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9747 enum machine_mode mode)
9749 switch (rs6000_cpu)
9751 case PROCESSOR_POWER8:
9752 case PROCESSOR_POWER9:
9753 if (DECIMAL_FLOAT_MODE_P (mode))
9754 return 1;
9755 if (VECTOR_MODE_P (mode))
9756 return 4;
9757 if (INTEGRAL_MODE_P (mode))
9758 return opc == MULT_EXPR ? 4 : 6;
9759 if (FLOAT_MODE_P (mode))
9760 return 4;
9761 break;
9762 default:
9763 break;
9765 return 1;
9768 /* Change register usage conditional on target flags. */
9769 static void
9770 rs6000_conditional_register_usage (void)
9772 int i;
9774 if (TARGET_DEBUG_TARGET)
9775 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9777 /* Set MQ register fixed (already call_used) so that it will not be
9778 allocated. */
9779 fixed_regs[64] = 1;
9781 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9782 if (TARGET_64BIT)
9783 fixed_regs[13] = call_used_regs[13]
9784 = call_really_used_regs[13] = 1;
9786 /* Conditionally disable FPRs. */
9787 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
9788 for (i = 32; i < 64; i++)
9789 fixed_regs[i] = call_used_regs[i]
9790 = call_really_used_regs[i] = 1;
9792 /* The TOC register is not killed across calls in a way that is
9793 visible to the compiler. */
9794 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9795 call_really_used_regs[2] = 0;
9797 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9798 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9800 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9801 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9802 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9803 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9805 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9806 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9807 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9808 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9810 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9811 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9812 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9814 if (TARGET_SPE)
9816 global_regs[SPEFSCR_REGNO] = 1;
9817 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
9818 registers in prologues and epilogues. We no longer use r14
9819 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
9820 pool for link-compatibility with older versions of GCC. Once
9821 "old" code has died out, we can return r14 to the allocation
9822 pool. */
9823 fixed_regs[14]
9824 = call_used_regs[14]
9825 = call_really_used_regs[14] = 1;
9828 if (!TARGET_ALTIVEC && !TARGET_VSX)
9830 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9831 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9832 call_really_used_regs[VRSAVE_REGNO] = 1;
9835 if (TARGET_ALTIVEC || TARGET_VSX)
9836 global_regs[VSCR_REGNO] = 1;
9838 if (TARGET_ALTIVEC_ABI)
9840 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9841 call_used_regs[i] = call_really_used_regs[i] = 1;
9843 /* AIX reserves VR20:31 in non-extended ABI mode. */
9844 if (TARGET_XCOFF)
9845 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9846 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9851 /* Output insns to set DEST equal to the constant SOURCE as a series of
9852 lis, ori and shl instructions and return TRUE. */
9854 bool
9855 rs6000_emit_set_const (rtx dest, rtx source)
9857 machine_mode mode = GET_MODE (dest);
9858 rtx temp, set;
9859 rtx_insn *insn;
9860 HOST_WIDE_INT c;
9862 gcc_checking_assert (CONST_INT_P (source));
9863 c = INTVAL (source);
9864 switch (mode)
9866 case QImode:
9867 case HImode:
9868 emit_insn (gen_rtx_SET (dest, source));
9869 return true;
9871 case SImode:
9872 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9874 emit_insn (gen_rtx_SET (copy_rtx (temp),
9875 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9876 emit_insn (gen_rtx_SET (dest,
9877 gen_rtx_IOR (SImode, copy_rtx (temp),
9878 GEN_INT (c & 0xffff))));
9879 break;
9881 case DImode:
9882 if (!TARGET_POWERPC64)
9884 rtx hi, lo;
9886 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9887 DImode);
9888 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9889 DImode);
9890 emit_move_insn (hi, GEN_INT (c >> 32));
9891 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9892 emit_move_insn (lo, GEN_INT (c));
9894 else
9895 rs6000_emit_set_long_const (dest, c);
9896 break;
9898 default:
9899 gcc_unreachable ();
9902 insn = get_last_insn ();
9903 set = single_set (insn);
9904 if (! CONSTANT_P (SET_SRC (set)))
9905 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9907 return true;
9910 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9911 Output insns to set DEST equal to the constant C as a series of
9912 lis, ori and shl instructions. */
9914 static void
9915 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9917 rtx temp;
9918 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9920 ud1 = c & 0xffff;
9921 c = c >> 16;
9922 ud2 = c & 0xffff;
9923 c = c >> 16;
9924 ud3 = c & 0xffff;
9925 c = c >> 16;
9926 ud4 = c & 0xffff;
9928 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9929 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9930 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9932 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9933 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9935 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9937 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9938 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9939 if (ud1 != 0)
9940 emit_move_insn (dest,
9941 gen_rtx_IOR (DImode, copy_rtx (temp),
9942 GEN_INT (ud1)));
9944 else if (ud3 == 0 && ud4 == 0)
9946 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9948 gcc_assert (ud2 & 0x8000);
9949 emit_move_insn (copy_rtx (temp),
9950 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9951 if (ud1 != 0)
9952 emit_move_insn (copy_rtx (temp),
9953 gen_rtx_IOR (DImode, copy_rtx (temp),
9954 GEN_INT (ud1)));
9955 emit_move_insn (dest,
9956 gen_rtx_ZERO_EXTEND (DImode,
9957 gen_lowpart (SImode,
9958 copy_rtx (temp))));
9960 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9961 || (ud4 == 0 && ! (ud3 & 0x8000)))
9963 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9965 emit_move_insn (copy_rtx (temp),
9966 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9967 if (ud2 != 0)
9968 emit_move_insn (copy_rtx (temp),
9969 gen_rtx_IOR (DImode, copy_rtx (temp),
9970 GEN_INT (ud2)));
9971 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9972 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9973 GEN_INT (16)));
9974 if (ud1 != 0)
9975 emit_move_insn (dest,
9976 gen_rtx_IOR (DImode, copy_rtx (temp),
9977 GEN_INT (ud1)));
9979 else
9981 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9983 emit_move_insn (copy_rtx (temp),
9984 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9985 if (ud3 != 0)
9986 emit_move_insn (copy_rtx (temp),
9987 gen_rtx_IOR (DImode, copy_rtx (temp),
9988 GEN_INT (ud3)));
9990 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9991 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9992 GEN_INT (32)));
9993 if (ud2 != 0)
9994 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9995 gen_rtx_IOR (DImode, copy_rtx (temp),
9996 GEN_INT (ud2 << 16)));
9997 if (ud1 != 0)
9998 emit_move_insn (dest,
9999 gen_rtx_IOR (DImode, copy_rtx (temp),
10000 GEN_INT (ud1)));
10004 /* Helper for the following. Get rid of [r+r] memory refs
10005 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10007 static void
10008 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10010 if (reload_in_progress)
10011 return;
10013 if (GET_CODE (operands[0]) == MEM
10014 && GET_CODE (XEXP (operands[0], 0)) != REG
10015 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10016 GET_MODE (operands[0]), false))
10017 operands[0]
10018 = replace_equiv_address (operands[0],
10019 copy_addr_to_reg (XEXP (operands[0], 0)));
10021 if (GET_CODE (operands[1]) == MEM
10022 && GET_CODE (XEXP (operands[1], 0)) != REG
10023 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10024 GET_MODE (operands[1]), false))
10025 operands[1]
10026 = replace_equiv_address (operands[1],
10027 copy_addr_to_reg (XEXP (operands[1], 0)));
10030 /* Generate a vector of constants to permute MODE for a little-endian
10031 storage operation by swapping the two halves of a vector. */
10032 static rtvec
10033 rs6000_const_vec (machine_mode mode)
10035 int i, subparts;
10036 rtvec v;
10038 switch (mode)
10040 case V1TImode:
10041 subparts = 1;
10042 break;
10043 case V2DFmode:
10044 case V2DImode:
10045 subparts = 2;
10046 break;
10047 case V4SFmode:
10048 case V4SImode:
10049 subparts = 4;
10050 break;
10051 case V8HImode:
10052 subparts = 8;
10053 break;
10054 case V16QImode:
10055 subparts = 16;
10056 break;
10057 default:
10058 gcc_unreachable();
10061 v = rtvec_alloc (subparts);
10063 for (i = 0; i < subparts / 2; ++i)
10064 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10065 for (i = subparts / 2; i < subparts; ++i)
10066 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10068 return v;
10071 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10072 for a VSX load or store operation. */
10074 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10076 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10077 128-bit integers if they are allowed in VSX registers. */
10078 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
10079 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10080 else
10082 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10083 return gen_rtx_VEC_SELECT (mode, source, par);
10087 /* Emit a little-endian load from vector memory location SOURCE to VSX
10088 register DEST in mode MODE. The load is done with two permuting
10089 insn's that represent an lxvd2x and xxpermdi. */
10090 void
10091 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10093 rtx tmp, permute_mem, permute_reg;
10095 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10096 V1TImode). */
10097 if (mode == TImode || mode == V1TImode)
10099 mode = V2DImode;
10100 dest = gen_lowpart (V2DImode, dest);
10101 source = adjust_address (source, V2DImode, 0);
10104 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10105 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10106 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10107 emit_insn (gen_rtx_SET (tmp, permute_mem));
10108 emit_insn (gen_rtx_SET (dest, permute_reg));
10111 /* Emit a little-endian store to vector memory location DEST from VSX
10112 register SOURCE in mode MODE. The store is done with two permuting
10113 insn's that represent an xxpermdi and an stxvd2x. */
10114 void
10115 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10117 rtx tmp, permute_src, permute_tmp;
10119 /* This should never be called during or after reload, because it does
10120 not re-permute the source register. It is intended only for use
10121 during expand. */
10122 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10124 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10125 V1TImode). */
10126 if (mode == TImode || mode == V1TImode)
10128 mode = V2DImode;
10129 dest = adjust_address (dest, V2DImode, 0);
10130 source = gen_lowpart (V2DImode, source);
10133 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10134 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10135 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10136 emit_insn (gen_rtx_SET (tmp, permute_src));
10137 emit_insn (gen_rtx_SET (dest, permute_tmp));
10140 /* Emit a sequence representing a little-endian VSX load or store,
10141 moving data from SOURCE to DEST in mode MODE. This is done
10142 separately from rs6000_emit_move to ensure it is called only
10143 during expand. LE VSX loads and stores introduced later are
10144 handled with a split. The expand-time RTL generation allows
10145 us to optimize away redundant pairs of register-permutes. */
10146 void
10147 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10149 gcc_assert (!BYTES_BIG_ENDIAN
10150 && VECTOR_MEM_VSX_P (mode)
10151 && !TARGET_P9_VECTOR
10152 && !gpr_or_gpr_p (dest, source)
10153 && (MEM_P (source) ^ MEM_P (dest)));
10155 if (MEM_P (source))
10157 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10158 rs6000_emit_le_vsx_load (dest, source, mode);
10160 else
10162 if (!REG_P (source))
10163 source = force_reg (mode, source);
10164 rs6000_emit_le_vsx_store (dest, source, mode);
10168 /* Emit a move from SOURCE to DEST in mode MODE. */
10169 void
10170 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10172 rtx operands[2];
10173 operands[0] = dest;
10174 operands[1] = source;
10176 if (TARGET_DEBUG_ADDR)
10178 fprintf (stderr,
10179 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10180 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10181 GET_MODE_NAME (mode),
10182 reload_in_progress,
10183 reload_completed,
10184 can_create_pseudo_p ());
10185 debug_rtx (dest);
10186 fprintf (stderr, "source:\n");
10187 debug_rtx (source);
10190 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10191 if (CONST_WIDE_INT_P (operands[1])
10192 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10194 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10195 gcc_unreachable ();
10198 /* Check if GCC is setting up a block move that will end up using FP
10199 registers as temporaries. We must make sure this is acceptable. */
10200 if (GET_CODE (operands[0]) == MEM
10201 && GET_CODE (operands[1]) == MEM
10202 && mode == DImode
10203 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10204 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10205 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10206 ? 32 : MEM_ALIGN (operands[0])))
10207 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10208 ? 32
10209 : MEM_ALIGN (operands[1]))))
10210 && ! MEM_VOLATILE_P (operands [0])
10211 && ! MEM_VOLATILE_P (operands [1]))
10213 emit_move_insn (adjust_address (operands[0], SImode, 0),
10214 adjust_address (operands[1], SImode, 0));
10215 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10216 adjust_address (copy_rtx (operands[1]), SImode, 4));
10217 return;
10220 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10221 && !gpc_reg_operand (operands[1], mode))
10222 operands[1] = force_reg (mode, operands[1]);
10224 /* Recognize the case where operand[1] is a reference to thread-local
10225 data and load its address to a register. */
10226 if (tls_referenced_p (operands[1]))
10228 enum tls_model model;
10229 rtx tmp = operands[1];
10230 rtx addend = NULL;
10232 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10234 addend = XEXP (XEXP (tmp, 0), 1);
10235 tmp = XEXP (XEXP (tmp, 0), 0);
10238 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10239 model = SYMBOL_REF_TLS_MODEL (tmp);
10240 gcc_assert (model != 0);
10242 tmp = rs6000_legitimize_tls_address (tmp, model);
10243 if (addend)
10245 tmp = gen_rtx_PLUS (mode, tmp, addend);
10246 tmp = force_operand (tmp, operands[0]);
10248 operands[1] = tmp;
10251 /* Handle the case where reload calls us with an invalid address. */
10252 if (reload_in_progress && mode == Pmode
10253 && (! general_operand (operands[1], mode)
10254 || ! nonimmediate_operand (operands[0], mode)))
10255 goto emit_set;
10257 /* 128-bit constant floating-point values on Darwin should really be loaded
10258 as two parts. However, this premature splitting is a problem when DFmode
10259 values can go into Altivec registers. */
10260 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10261 && GET_CODE (operands[1]) == CONST_DOUBLE)
10263 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10264 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10265 DFmode);
10266 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10267 GET_MODE_SIZE (DFmode)),
10268 simplify_gen_subreg (DFmode, operands[1], mode,
10269 GET_MODE_SIZE (DFmode)),
10270 DFmode);
10271 return;
10274 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10275 cfun->machine->sdmode_stack_slot =
10276 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10279 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10280 p1:SD) if p1 is not of floating point class and p0 is spilled as
10281 we can have no analogous movsd_store for this. */
10282 if (lra_in_progress && mode == DDmode
10283 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10284 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10285 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10286 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10288 enum reg_class cl;
10289 int regno = REGNO (SUBREG_REG (operands[1]));
10291 if (regno >= FIRST_PSEUDO_REGISTER)
10293 cl = reg_preferred_class (regno);
10294 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10296 if (regno >= 0 && ! FP_REGNO_P (regno))
10298 mode = SDmode;
10299 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10300 operands[1] = SUBREG_REG (operands[1]);
10303 if (lra_in_progress
10304 && mode == SDmode
10305 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10306 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10307 && (REG_P (operands[1])
10308 || (GET_CODE (operands[1]) == SUBREG
10309 && REG_P (SUBREG_REG (operands[1])))))
10311 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10312 ? SUBREG_REG (operands[1]) : operands[1]);
10313 enum reg_class cl;
10315 if (regno >= FIRST_PSEUDO_REGISTER)
10317 cl = reg_preferred_class (regno);
10318 gcc_assert (cl != NO_REGS);
10319 regno = ira_class_hard_regs[cl][0];
10321 if (FP_REGNO_P (regno))
10323 if (GET_MODE (operands[0]) != DDmode)
10324 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10325 emit_insn (gen_movsd_store (operands[0], operands[1]));
10327 else if (INT_REGNO_P (regno))
10328 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10329 else
10330 gcc_unreachable();
10331 return;
10333 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10334 p:DD)) if p0 is not of floating point class and p1 is spilled as
10335 we can have no analogous movsd_load for this. */
10336 if (lra_in_progress && mode == DDmode
10337 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10338 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10339 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10340 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10342 enum reg_class cl;
10343 int regno = REGNO (SUBREG_REG (operands[0]));
10345 if (regno >= FIRST_PSEUDO_REGISTER)
10347 cl = reg_preferred_class (regno);
10348 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10350 if (regno >= 0 && ! FP_REGNO_P (regno))
10352 mode = SDmode;
10353 operands[0] = SUBREG_REG (operands[0]);
10354 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10357 if (lra_in_progress
10358 && mode == SDmode
10359 && (REG_P (operands[0])
10360 || (GET_CODE (operands[0]) == SUBREG
10361 && REG_P (SUBREG_REG (operands[0]))))
10362 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10363 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10365 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10366 ? SUBREG_REG (operands[0]) : operands[0]);
10367 enum reg_class cl;
10369 if (regno >= FIRST_PSEUDO_REGISTER)
10371 cl = reg_preferred_class (regno);
10372 gcc_assert (cl != NO_REGS);
10373 regno = ira_class_hard_regs[cl][0];
10375 if (FP_REGNO_P (regno))
10377 if (GET_MODE (operands[1]) != DDmode)
10378 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10379 emit_insn (gen_movsd_load (operands[0], operands[1]));
10381 else if (INT_REGNO_P (regno))
10382 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10383 else
10384 gcc_unreachable();
10385 return;
10388 if (reload_in_progress
10389 && mode == SDmode
10390 && cfun->machine->sdmode_stack_slot != NULL_RTX
10391 && MEM_P (operands[0])
10392 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10393 && REG_P (operands[1]))
10395 if (FP_REGNO_P (REGNO (operands[1])))
10397 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10398 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10399 emit_insn (gen_movsd_store (mem, operands[1]));
10401 else if (INT_REGNO_P (REGNO (operands[1])))
10403 rtx mem = operands[0];
10404 if (BYTES_BIG_ENDIAN)
10405 mem = adjust_address_nv (mem, mode, 4);
10406 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10407 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10409 else
10410 gcc_unreachable();
10411 return;
10413 if (reload_in_progress
10414 && mode == SDmode
10415 && REG_P (operands[0])
10416 && MEM_P (operands[1])
10417 && cfun->machine->sdmode_stack_slot != NULL_RTX
10418 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10420 if (FP_REGNO_P (REGNO (operands[0])))
10422 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10423 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10424 emit_insn (gen_movsd_load (operands[0], mem));
10426 else if (INT_REGNO_P (REGNO (operands[0])))
10428 rtx mem = operands[1];
10429 if (BYTES_BIG_ENDIAN)
10430 mem = adjust_address_nv (mem, mode, 4);
10431 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10432 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10434 else
10435 gcc_unreachable();
10436 return;
10439 /* FIXME: In the long term, this switch statement should go away
10440 and be replaced by a sequence of tests based on things like
10441 mode == Pmode. */
10442 switch (mode)
10444 case HImode:
10445 case QImode:
10446 if (CONSTANT_P (operands[1])
10447 && GET_CODE (operands[1]) != CONST_INT)
10448 operands[1] = force_const_mem (mode, operands[1]);
10449 break;
10451 case TFmode:
10452 case TDmode:
10453 case IFmode:
10454 case KFmode:
10455 if (FLOAT128_2REG_P (mode))
10456 rs6000_eliminate_indexed_memrefs (operands);
10457 /* fall through */
10459 case DFmode:
10460 case DDmode:
10461 case SFmode:
10462 case SDmode:
10463 if (CONSTANT_P (operands[1])
10464 && ! easy_fp_constant (operands[1], mode))
10465 operands[1] = force_const_mem (mode, operands[1]);
10466 break;
10468 case V16QImode:
10469 case V8HImode:
10470 case V4SFmode:
10471 case V4SImode:
10472 case V4HImode:
10473 case V2SFmode:
10474 case V2SImode:
10475 case V1DImode:
10476 case V2DFmode:
10477 case V2DImode:
10478 case V1TImode:
10479 if (CONSTANT_P (operands[1])
10480 && !easy_vector_constant (operands[1], mode))
10481 operands[1] = force_const_mem (mode, operands[1]);
10482 break;
10484 case SImode:
10485 case DImode:
10486 /* Use default pattern for address of ELF small data */
10487 if (TARGET_ELF
10488 && mode == Pmode
10489 && DEFAULT_ABI == ABI_V4
10490 && (GET_CODE (operands[1]) == SYMBOL_REF
10491 || GET_CODE (operands[1]) == CONST)
10492 && small_data_operand (operands[1], mode))
10494 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10495 return;
10498 if (DEFAULT_ABI == ABI_V4
10499 && mode == Pmode && mode == SImode
10500 && flag_pic == 1 && got_operand (operands[1], mode))
10502 emit_insn (gen_movsi_got (operands[0], operands[1]));
10503 return;
10506 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10507 && TARGET_NO_TOC
10508 && ! flag_pic
10509 && mode == Pmode
10510 && CONSTANT_P (operands[1])
10511 && GET_CODE (operands[1]) != HIGH
10512 && GET_CODE (operands[1]) != CONST_INT)
10514 rtx target = (!can_create_pseudo_p ()
10515 ? operands[0]
10516 : gen_reg_rtx (mode));
10518 /* If this is a function address on -mcall-aixdesc,
10519 convert it to the address of the descriptor. */
10520 if (DEFAULT_ABI == ABI_AIX
10521 && GET_CODE (operands[1]) == SYMBOL_REF
10522 && XSTR (operands[1], 0)[0] == '.')
10524 const char *name = XSTR (operands[1], 0);
10525 rtx new_ref;
10526 while (*name == '.')
10527 name++;
10528 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10529 CONSTANT_POOL_ADDRESS_P (new_ref)
10530 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10531 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10532 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10533 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10534 operands[1] = new_ref;
10537 if (DEFAULT_ABI == ABI_DARWIN)
10539 #if TARGET_MACHO
10540 if (MACHO_DYNAMIC_NO_PIC_P)
10542 /* Take care of any required data indirection. */
10543 operands[1] = rs6000_machopic_legitimize_pic_address (
10544 operands[1], mode, operands[0]);
10545 if (operands[0] != operands[1])
10546 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10547 return;
10549 #endif
10550 emit_insn (gen_macho_high (target, operands[1]));
10551 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10552 return;
10555 emit_insn (gen_elf_high (target, operands[1]));
10556 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10557 return;
10560 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10561 and we have put it in the TOC, we just need to make a TOC-relative
10562 reference to it. */
10563 if (TARGET_TOC
10564 && GET_CODE (operands[1]) == SYMBOL_REF
10565 && use_toc_relative_ref (operands[1], mode))
10566 operands[1] = create_TOC_reference (operands[1], operands[0]);
10567 else if (mode == Pmode
10568 && CONSTANT_P (operands[1])
10569 && GET_CODE (operands[1]) != HIGH
10570 && ((GET_CODE (operands[1]) != CONST_INT
10571 && ! easy_fp_constant (operands[1], mode))
10572 || (GET_CODE (operands[1]) == CONST_INT
10573 && (num_insns_constant (operands[1], mode)
10574 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10575 || (GET_CODE (operands[0]) == REG
10576 && FP_REGNO_P (REGNO (operands[0]))))
10577 && !toc_relative_expr_p (operands[1], false)
10578 && (TARGET_CMODEL == CMODEL_SMALL
10579 || can_create_pseudo_p ()
10580 || (REG_P (operands[0])
10581 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10584 #if TARGET_MACHO
10585 /* Darwin uses a special PIC legitimizer. */
10586 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10588 operands[1] =
10589 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10590 operands[0]);
10591 if (operands[0] != operands[1])
10592 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10593 return;
10595 #endif
10597 /* If we are to limit the number of things we put in the TOC and
10598 this is a symbol plus a constant we can add in one insn,
10599 just put the symbol in the TOC and add the constant. Don't do
10600 this if reload is in progress. */
10601 if (GET_CODE (operands[1]) == CONST
10602 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10603 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10604 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10605 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10606 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10607 && ! side_effects_p (operands[0]))
10609 rtx sym =
10610 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10611 rtx other = XEXP (XEXP (operands[1], 0), 1);
10613 sym = force_reg (mode, sym);
10614 emit_insn (gen_add3_insn (operands[0], sym, other));
10615 return;
10618 operands[1] = force_const_mem (mode, operands[1]);
10620 if (TARGET_TOC
10621 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10622 && constant_pool_expr_p (XEXP (operands[1], 0))
10623 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
10624 get_pool_constant (XEXP (operands[1], 0)),
10625 get_pool_mode (XEXP (operands[1], 0))))
10627 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10628 operands[0]);
10629 operands[1] = gen_const_mem (mode, tocref);
10630 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10633 break;
10635 case TImode:
10636 if (!VECTOR_MEM_VSX_P (TImode))
10637 rs6000_eliminate_indexed_memrefs (operands);
10638 break;
10640 case PTImode:
10641 rs6000_eliminate_indexed_memrefs (operands);
10642 break;
10644 default:
10645 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10648 /* Above, we may have called force_const_mem which may have returned
10649 an invalid address. If we can, fix this up; otherwise, reload will
10650 have to deal with it. */
10651 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
10652 operands[1] = validize_mem (operands[1]);
10654 emit_set:
10655 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10658 /* Return true if a structure, union or array containing FIELD should be
10659 accessed using `BLKMODE'.
10661 For the SPE, simd types are V2SI, and gcc can be tempted to put the
10662 entire thing in a DI and use subregs to access the internals.
10663 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
10664 back-end. Because a single GPR can hold a V2SI, but not a DI, the
10665 best thing to do is set structs to BLKmode and avoid Severe Tire
10666 Damage.
10668 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
10669 fit into 1, whereas DI still needs two. */
10671 static bool
10672 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
10674 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
10675 || (TARGET_E500_DOUBLE && mode == DFmode));
10678 /* Nonzero if we can use a floating-point register to pass this arg. */
10679 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10680 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10681 && (CUM)->fregno <= FP_ARG_MAX_REG \
10682 && TARGET_HARD_FLOAT && TARGET_FPRS)
10684 /* Nonzero if we can use an AltiVec register to pass this arg. */
10685 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10686 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10687 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10688 && TARGET_ALTIVEC_ABI \
10689 && (NAMED))
10691 /* Walk down the type tree of TYPE counting consecutive base elements.
10692 If *MODEP is VOIDmode, then set it to the first valid floating point
10693 or vector type. If a non-floating point or vector type is found, or
10694 if a floating point or vector type that doesn't match a non-VOIDmode
10695 *MODEP is found, then return -1, otherwise return the count in the
10696 sub-tree. */
10698 static int
10699 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10701 machine_mode mode;
10702 HOST_WIDE_INT size;
10704 switch (TREE_CODE (type))
10706 case REAL_TYPE:
10707 mode = TYPE_MODE (type);
10708 if (!SCALAR_FLOAT_MODE_P (mode))
10709 return -1;
10711 if (*modep == VOIDmode)
10712 *modep = mode;
10714 if (*modep == mode)
10715 return 1;
10717 break;
10719 case COMPLEX_TYPE:
10720 mode = TYPE_MODE (TREE_TYPE (type));
10721 if (!SCALAR_FLOAT_MODE_P (mode))
10722 return -1;
10724 if (*modep == VOIDmode)
10725 *modep = mode;
10727 if (*modep == mode)
10728 return 2;
10730 break;
10732 case VECTOR_TYPE:
10733 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10734 return -1;
10736 /* Use V4SImode as representative of all 128-bit vector types. */
10737 size = int_size_in_bytes (type);
10738 switch (size)
10740 case 16:
10741 mode = V4SImode;
10742 break;
10743 default:
10744 return -1;
10747 if (*modep == VOIDmode)
10748 *modep = mode;
10750 /* Vector modes are considered to be opaque: two vectors are
10751 equivalent for the purposes of being homogeneous aggregates
10752 if they are the same size. */
10753 if (*modep == mode)
10754 return 1;
10756 break;
10758 case ARRAY_TYPE:
10760 int count;
10761 tree index = TYPE_DOMAIN (type);
10763 /* Can't handle incomplete types nor sizes that are not
10764 fixed. */
10765 if (!COMPLETE_TYPE_P (type)
10766 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10767 return -1;
10769 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10770 if (count == -1
10771 || !index
10772 || !TYPE_MAX_VALUE (index)
10773 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10774 || !TYPE_MIN_VALUE (index)
10775 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10776 || count < 0)
10777 return -1;
10779 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10780 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10782 /* There must be no padding. */
10783 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10784 return -1;
10786 return count;
10789 case RECORD_TYPE:
10791 int count = 0;
10792 int sub_count;
10793 tree field;
10795 /* Can't handle incomplete types nor sizes that are not
10796 fixed. */
10797 if (!COMPLETE_TYPE_P (type)
10798 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10799 return -1;
10801 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10803 if (TREE_CODE (field) != FIELD_DECL)
10804 continue;
10806 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10807 if (sub_count < 0)
10808 return -1;
10809 count += sub_count;
10812 /* There must be no padding. */
10813 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10814 return -1;
10816 return count;
10819 case UNION_TYPE:
10820 case QUAL_UNION_TYPE:
10822 /* These aren't very interesting except in a degenerate case. */
10823 int count = 0;
10824 int sub_count;
10825 tree field;
10827 /* Can't handle incomplete types nor sizes that are not
10828 fixed. */
10829 if (!COMPLETE_TYPE_P (type)
10830 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10831 return -1;
10833 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10835 if (TREE_CODE (field) != FIELD_DECL)
10836 continue;
10838 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10839 if (sub_count < 0)
10840 return -1;
10841 count = count > sub_count ? count : sub_count;
10844 /* There must be no padding. */
10845 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10846 return -1;
10848 return count;
10851 default:
10852 break;
10855 return -1;
10858 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10859 float or vector aggregate that shall be passed in FP/vector registers
10860 according to the ELFv2 ABI, return the homogeneous element mode in
10861 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10863 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10865 static bool
10866 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10867 machine_mode *elt_mode,
10868 int *n_elts)
10870 /* Note that we do not accept complex types at the top level as
10871 homogeneous aggregates; these types are handled via the
10872 targetm.calls.split_complex_arg mechanism. Complex types
10873 can be elements of homogeneous aggregates, however. */
10874 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
10876 machine_mode field_mode = VOIDmode;
10877 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10879 if (field_count > 0)
10881 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10882 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10884 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10885 up to AGGR_ARG_NUM_REG registers. */
10886 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10888 if (elt_mode)
10889 *elt_mode = field_mode;
10890 if (n_elts)
10891 *n_elts = field_count;
10892 return true;
10897 if (elt_mode)
10898 *elt_mode = mode;
10899 if (n_elts)
10900 *n_elts = 1;
10901 return false;
10904 /* Return a nonzero value to say to return the function value in
10905 memory, just as large structures are always returned. TYPE will be
10906 the data type of the value, and FNTYPE will be the type of the
10907 function doing the returning, or @code{NULL} for libcalls.
10909 The AIX ABI for the RS/6000 specifies that all structures are
10910 returned in memory. The Darwin ABI does the same.
10912 For the Darwin 64 Bit ABI, a function result can be returned in
10913 registers or in memory, depending on the size of the return data
10914 type. If it is returned in registers, the value occupies the same
10915 registers as it would if it were the first and only function
10916 argument. Otherwise, the function places its result in memory at
10917 the location pointed to by GPR3.
10919 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10920 but a draft put them in memory, and GCC used to implement the draft
10921 instead of the final standard. Therefore, aix_struct_return
10922 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10923 compatibility can change DRAFT_V4_STRUCT_RET to override the
10924 default, and -m switches get the final word. See
10925 rs6000_option_override_internal for more details.
10927 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10928 long double support is enabled. These values are returned in memory.
10930 int_size_in_bytes returns -1 for variable size objects, which go in
10931 memory always. The cast to unsigned makes -1 > 8. */
10933 static bool
10934 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10936 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10937 if (TARGET_MACHO
10938 && rs6000_darwin64_abi
10939 && TREE_CODE (type) == RECORD_TYPE
10940 && int_size_in_bytes (type) > 0)
10942 CUMULATIVE_ARGS valcum;
10943 rtx valret;
10945 valcum.words = 0;
10946 valcum.fregno = FP_ARG_MIN_REG;
10947 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10948 /* Do a trial code generation as if this were going to be passed
10949 as an argument; if any part goes in memory, we return NULL. */
10950 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10951 if (valret)
10952 return false;
10953 /* Otherwise fall through to more conventional ABI rules. */
10956 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10957 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10958 NULL, NULL))
10959 return false;
10961 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10962 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10963 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10964 return false;
10966 if (AGGREGATE_TYPE_P (type)
10967 && (aix_struct_return
10968 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10969 return true;
10971 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10972 modes only exist for GCC vector types if -maltivec. */
10973 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10974 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10975 return false;
10977 /* Return synthetic vectors in memory. */
10978 if (TREE_CODE (type) == VECTOR_TYPE
10979 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10981 static bool warned_for_return_big_vectors = false;
10982 if (!warned_for_return_big_vectors)
10984 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10985 "non-standard ABI extension with no compatibility guarantee");
10986 warned_for_return_big_vectors = true;
10988 return true;
10991 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10992 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10993 return true;
10995 return false;
10998 /* Specify whether values returned in registers should be at the most
10999 significant end of a register. We want aggregates returned by
11000 value to match the way aggregates are passed to functions. */
11002 static bool
11003 rs6000_return_in_msb (const_tree valtype)
11005 return (DEFAULT_ABI == ABI_ELFv2
11006 && BYTES_BIG_ENDIAN
11007 && AGGREGATE_TYPE_P (valtype)
11008 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11011 #ifdef HAVE_AS_GNU_ATTRIBUTE
11012 /* Return TRUE if a call to function FNDECL may be one that
11013 potentially affects the function calling ABI of the object file. */
11015 static bool
11016 call_ABI_of_interest (tree fndecl)
11018 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11020 struct cgraph_node *c_node;
11022 /* Libcalls are always interesting. */
11023 if (fndecl == NULL_TREE)
11024 return true;
11026 /* Any call to an external function is interesting. */
11027 if (DECL_EXTERNAL (fndecl))
11028 return true;
11030 /* Interesting functions that we are emitting in this object file. */
11031 c_node = cgraph_node::get (fndecl);
11032 c_node = c_node->ultimate_alias_target ();
11033 return !c_node->only_called_directly_p ();
11035 return false;
11037 #endif
11039 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11040 for a call to a function whose data type is FNTYPE.
11041 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11043 For incoming args we set the number of arguments in the prototype large
11044 so we never return a PARALLEL. */
11046 void
11047 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11048 rtx libname ATTRIBUTE_UNUSED, int incoming,
11049 int libcall, int n_named_args,
11050 tree fndecl ATTRIBUTE_UNUSED,
11051 machine_mode return_mode ATTRIBUTE_UNUSED)
11053 static CUMULATIVE_ARGS zero_cumulative;
11055 *cum = zero_cumulative;
11056 cum->words = 0;
11057 cum->fregno = FP_ARG_MIN_REG;
11058 cum->vregno = ALTIVEC_ARG_MIN_REG;
11059 cum->prototype = (fntype && prototype_p (fntype));
11060 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11061 ? CALL_LIBCALL : CALL_NORMAL);
11062 cum->sysv_gregno = GP_ARG_MIN_REG;
11063 cum->stdarg = stdarg_p (fntype);
11064 cum->libcall = libcall;
11066 cum->nargs_prototype = 0;
11067 if (incoming || cum->prototype)
11068 cum->nargs_prototype = n_named_args;
11070 /* Check for a longcall attribute. */
11071 if ((!fntype && rs6000_default_long_calls)
11072 || (fntype
11073 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11074 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11075 cum->call_cookie |= CALL_LONG;
11077 if (TARGET_DEBUG_ARG)
11079 fprintf (stderr, "\ninit_cumulative_args:");
11080 if (fntype)
11082 tree ret_type = TREE_TYPE (fntype);
11083 fprintf (stderr, " ret code = %s,",
11084 get_tree_code_name (TREE_CODE (ret_type)));
11087 if (cum->call_cookie & CALL_LONG)
11088 fprintf (stderr, " longcall,");
11090 fprintf (stderr, " proto = %d, nargs = %d\n",
11091 cum->prototype, cum->nargs_prototype);
11094 #ifdef HAVE_AS_GNU_ATTRIBUTE
11095 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11097 cum->escapes = call_ABI_of_interest (fndecl);
11098 if (cum->escapes)
11100 tree return_type;
11102 if (fntype)
11104 return_type = TREE_TYPE (fntype);
11105 return_mode = TYPE_MODE (return_type);
11107 else
11108 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11110 if (return_type != NULL)
11112 if (TREE_CODE (return_type) == RECORD_TYPE
11113 && TYPE_TRANSPARENT_AGGR (return_type))
11115 return_type = TREE_TYPE (first_field (return_type));
11116 return_mode = TYPE_MODE (return_type);
11118 if (AGGREGATE_TYPE_P (return_type)
11119 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11120 <= 8))
11121 rs6000_returns_struct = true;
11123 if (SCALAR_FLOAT_MODE_P (return_mode))
11125 rs6000_passes_float = true;
11126 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11127 && (FLOAT128_IBM_P (return_mode)
11128 || FLOAT128_IEEE_P (return_mode)
11129 || (return_type != NULL
11130 && (TYPE_MAIN_VARIANT (return_type)
11131 == long_double_type_node))))
11132 rs6000_passes_long_double = true;
11134 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11135 || SPE_VECTOR_MODE (return_mode))
11136 rs6000_passes_vector = true;
11139 #endif
11141 if (fntype
11142 && !TARGET_ALTIVEC
11143 && TARGET_ALTIVEC_ABI
11144 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11146 error ("cannot return value in vector register because"
11147 " altivec instructions are disabled, use -maltivec"
11148 " to enable them");
11152 /* The mode the ABI uses for a word. This is not the same as word_mode
11153 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11155 static machine_mode
11156 rs6000_abi_word_mode (void)
11158 return TARGET_32BIT ? SImode : DImode;
11161 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11162 static char *
11163 rs6000_offload_options (void)
11165 if (TARGET_64BIT)
11166 return xstrdup ("-foffload-abi=lp64");
11167 else
11168 return xstrdup ("-foffload-abi=ilp32");
11171 /* On rs6000, function arguments are promoted, as are function return
11172 values. */
11174 static machine_mode
11175 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11176 machine_mode mode,
11177 int *punsignedp ATTRIBUTE_UNUSED,
11178 const_tree, int)
11180 PROMOTE_MODE (mode, *punsignedp, type);
11182 return mode;
11185 /* Return true if TYPE must be passed on the stack and not in registers. */
11187 static bool
11188 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11190 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11191 return must_pass_in_stack_var_size (mode, type);
11192 else
11193 return must_pass_in_stack_var_size_or_pad (mode, type);
11196 static inline bool
11197 is_complex_IBM_long_double (machine_mode mode)
11199 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11202 /* Whether ABI_V4 passes MODE args to a function in floating point
11203 registers. */
11205 static bool
11206 abi_v4_pass_in_fpr (machine_mode mode)
11208 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11209 return false;
11210 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11211 return true;
11212 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11213 return true;
11214 /* ABI_V4 passes complex IBM long double in 8 gprs.
11215 Stupid, but we can't change the ABI now. */
11216 if (is_complex_IBM_long_double (mode))
11217 return false;
11218 if (FLOAT128_2REG_P (mode))
11219 return true;
11220 if (DECIMAL_FLOAT_MODE_P (mode))
11221 return true;
11222 return false;
11225 /* If defined, a C expression which determines whether, and in which
11226 direction, to pad out an argument with extra space. The value
11227 should be of type `enum direction': either `upward' to pad above
11228 the argument, `downward' to pad below, or `none' to inhibit
11229 padding.
11231 For the AIX ABI structs are always stored left shifted in their
11232 argument slot. */
11234 enum direction
11235 function_arg_padding (machine_mode mode, const_tree type)
11237 #ifndef AGGREGATE_PADDING_FIXED
11238 #define AGGREGATE_PADDING_FIXED 0
11239 #endif
11240 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11241 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11242 #endif
11244 if (!AGGREGATE_PADDING_FIXED)
11246 /* GCC used to pass structures of the same size as integer types as
11247 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11248 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11249 passed padded downward, except that -mstrict-align further
11250 muddied the water in that multi-component structures of 2 and 4
11251 bytes in size were passed padded upward.
11253 The following arranges for best compatibility with previous
11254 versions of gcc, but removes the -mstrict-align dependency. */
11255 if (BYTES_BIG_ENDIAN)
11257 HOST_WIDE_INT size = 0;
11259 if (mode == BLKmode)
11261 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11262 size = int_size_in_bytes (type);
11264 else
11265 size = GET_MODE_SIZE (mode);
11267 if (size == 1 || size == 2 || size == 4)
11268 return downward;
11270 return upward;
11273 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11275 if (type != 0 && AGGREGATE_TYPE_P (type))
11276 return upward;
11279 /* Fall back to the default. */
11280 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11283 /* If defined, a C expression that gives the alignment boundary, in bits,
11284 of an argument with the specified mode and type. If it is not defined,
11285 PARM_BOUNDARY is used for all arguments.
11287 V.4 wants long longs and doubles to be double word aligned. Just
11288 testing the mode size is a boneheaded way to do this as it means
11289 that other types such as complex int are also double word aligned.
11290 However, we're stuck with this because changing the ABI might break
11291 existing library interfaces.
11293 Doubleword align SPE vectors.
11294 Quadword align Altivec/VSX vectors.
11295 Quadword align large synthetic vector types. */
11297 static unsigned int
11298 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11300 machine_mode elt_mode;
11301 int n_elts;
11303 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11305 if (DEFAULT_ABI == ABI_V4
11306 && (GET_MODE_SIZE (mode) == 8
11307 || (TARGET_HARD_FLOAT
11308 && TARGET_FPRS
11309 && !is_complex_IBM_long_double (mode)
11310 && FLOAT128_2REG_P (mode))))
11311 return 64;
11312 else if (FLOAT128_VECTOR_P (mode))
11313 return 128;
11314 else if (SPE_VECTOR_MODE (mode)
11315 || (type && TREE_CODE (type) == VECTOR_TYPE
11316 && int_size_in_bytes (type) >= 8
11317 && int_size_in_bytes (type) < 16))
11318 return 64;
11319 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11320 || (type && TREE_CODE (type) == VECTOR_TYPE
11321 && int_size_in_bytes (type) >= 16))
11322 return 128;
11324 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11325 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11326 -mcompat-align-parm is used. */
11327 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11328 || DEFAULT_ABI == ABI_ELFv2)
11329 && type && TYPE_ALIGN (type) > 64)
11331 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11332 or homogeneous float/vector aggregates here. We already handled
11333 vector aggregates above, but still need to check for float here. */
11334 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11335 && !SCALAR_FLOAT_MODE_P (elt_mode));
11337 /* We used to check for BLKmode instead of the above aggregate type
11338 check. Warn when this results in any difference to the ABI. */
11339 if (aggregate_p != (mode == BLKmode))
11341 static bool warned;
11342 if (!warned && warn_psabi)
11344 warned = true;
11345 inform (input_location,
11346 "the ABI of passing aggregates with %d-byte alignment"
11347 " has changed in GCC 5",
11348 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11352 if (aggregate_p)
11353 return 128;
11356 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11357 implement the "aggregate type" check as a BLKmode check here; this
11358 means certain aggregate types are in fact not aligned. */
11359 if (TARGET_MACHO && rs6000_darwin64_abi
11360 && mode == BLKmode
11361 && type && TYPE_ALIGN (type) > 64)
11362 return 128;
11364 return PARM_BOUNDARY;
11367 /* The offset in words to the start of the parameter save area. */
11369 static unsigned int
11370 rs6000_parm_offset (void)
11372 return (DEFAULT_ABI == ABI_V4 ? 2
11373 : DEFAULT_ABI == ABI_ELFv2 ? 4
11374 : 6);
11377 /* For a function parm of MODE and TYPE, return the starting word in
11378 the parameter area. NWORDS of the parameter area are already used. */
11380 static unsigned int
11381 rs6000_parm_start (machine_mode mode, const_tree type,
11382 unsigned int nwords)
11384 unsigned int align;
11386 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11387 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11390 /* Compute the size (in words) of a function argument. */
11392 static unsigned long
11393 rs6000_arg_size (machine_mode mode, const_tree type)
11395 unsigned long size;
11397 if (mode != BLKmode)
11398 size = GET_MODE_SIZE (mode);
11399 else
11400 size = int_size_in_bytes (type);
11402 if (TARGET_32BIT)
11403 return (size + 3) >> 2;
11404 else
11405 return (size + 7) >> 3;
11408 /* Use this to flush pending int fields. */
11410 static void
11411 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11412 HOST_WIDE_INT bitpos, int final)
11414 unsigned int startbit, endbit;
11415 int intregs, intoffset;
11416 machine_mode mode;
11418 /* Handle the situations where a float is taking up the first half
11419 of the GPR, and the other half is empty (typically due to
11420 alignment restrictions). We can detect this by a 8-byte-aligned
11421 int field, or by seeing that this is the final flush for this
11422 argument. Count the word and continue on. */
11423 if (cum->floats_in_gpr == 1
11424 && (cum->intoffset % 64 == 0
11425 || (cum->intoffset == -1 && final)))
11427 cum->words++;
11428 cum->floats_in_gpr = 0;
11431 if (cum->intoffset == -1)
11432 return;
11434 intoffset = cum->intoffset;
11435 cum->intoffset = -1;
11436 cum->floats_in_gpr = 0;
11438 if (intoffset % BITS_PER_WORD != 0)
11440 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11441 MODE_INT, 0);
11442 if (mode == BLKmode)
11444 /* We couldn't find an appropriate mode, which happens,
11445 e.g., in packed structs when there are 3 bytes to load.
11446 Back intoffset back to the beginning of the word in this
11447 case. */
11448 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11452 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11453 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11454 intregs = (endbit - startbit) / BITS_PER_WORD;
11455 cum->words += intregs;
11456 /* words should be unsigned. */
11457 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11459 int pad = (endbit/BITS_PER_WORD) - cum->words;
11460 cum->words += pad;
11464 /* The darwin64 ABI calls for us to recurse down through structs,
11465 looking for elements passed in registers. Unfortunately, we have
11466 to track int register count here also because of misalignments
11467 in powerpc alignment mode. */
11469 static void
11470 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11471 const_tree type,
11472 HOST_WIDE_INT startbitpos)
11474 tree f;
11476 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11477 if (TREE_CODE (f) == FIELD_DECL)
11479 HOST_WIDE_INT bitpos = startbitpos;
11480 tree ftype = TREE_TYPE (f);
11481 machine_mode mode;
11482 if (ftype == error_mark_node)
11483 continue;
11484 mode = TYPE_MODE (ftype);
11486 if (DECL_SIZE (f) != 0
11487 && tree_fits_uhwi_p (bit_position (f)))
11488 bitpos += int_bit_position (f);
11490 /* ??? FIXME: else assume zero offset. */
11492 if (TREE_CODE (ftype) == RECORD_TYPE)
11493 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11494 else if (USE_FP_FOR_ARG_P (cum, mode))
11496 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11497 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11498 cum->fregno += n_fpregs;
11499 /* Single-precision floats present a special problem for
11500 us, because they are smaller than an 8-byte GPR, and so
11501 the structure-packing rules combined with the standard
11502 varargs behavior mean that we want to pack float/float
11503 and float/int combinations into a single register's
11504 space. This is complicated by the arg advance flushing,
11505 which works on arbitrarily large groups of int-type
11506 fields. */
11507 if (mode == SFmode)
11509 if (cum->floats_in_gpr == 1)
11511 /* Two floats in a word; count the word and reset
11512 the float count. */
11513 cum->words++;
11514 cum->floats_in_gpr = 0;
11516 else if (bitpos % 64 == 0)
11518 /* A float at the beginning of an 8-byte word;
11519 count it and put off adjusting cum->words until
11520 we see if a arg advance flush is going to do it
11521 for us. */
11522 cum->floats_in_gpr++;
11524 else
11526 /* The float is at the end of a word, preceded
11527 by integer fields, so the arg advance flush
11528 just above has already set cum->words and
11529 everything is taken care of. */
11532 else
11533 cum->words += n_fpregs;
11535 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11537 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11538 cum->vregno++;
11539 cum->words += 2;
11541 else if (cum->intoffset == -1)
11542 cum->intoffset = bitpos;
11546 /* Check for an item that needs to be considered specially under the darwin 64
11547 bit ABI. These are record types where the mode is BLK or the structure is
11548 8 bytes in size. */
11549 static int
11550 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11552 return rs6000_darwin64_abi
11553 && ((mode == BLKmode
11554 && TREE_CODE (type) == RECORD_TYPE
11555 && int_size_in_bytes (type) > 0)
11556 || (type && TREE_CODE (type) == RECORD_TYPE
11557 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11560 /* Update the data in CUM to advance over an argument
11561 of mode MODE and data type TYPE.
11562 (TYPE is null for libcalls where that information may not be available.)
11564 Note that for args passed by reference, function_arg will be called
11565 with MODE and TYPE set to that of the pointer to the arg, not the arg
11566 itself. */
11568 static void
11569 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11570 const_tree type, bool named, int depth)
11572 machine_mode elt_mode;
11573 int n_elts;
11575 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11577 /* Only tick off an argument if we're not recursing. */
11578 if (depth == 0)
11579 cum->nargs_prototype--;
11581 #ifdef HAVE_AS_GNU_ATTRIBUTE
11582 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11583 && cum->escapes)
11585 if (SCALAR_FLOAT_MODE_P (mode))
11587 rs6000_passes_float = true;
11588 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11589 && (FLOAT128_IBM_P (mode)
11590 || FLOAT128_IEEE_P (mode)
11591 || (type != NULL
11592 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11593 rs6000_passes_long_double = true;
11595 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11596 || (SPE_VECTOR_MODE (mode)
11597 && !cum->stdarg
11598 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11599 rs6000_passes_vector = true;
11601 #endif
11603 if (TARGET_ALTIVEC_ABI
11604 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11605 || (type && TREE_CODE (type) == VECTOR_TYPE
11606 && int_size_in_bytes (type) == 16)))
11608 bool stack = false;
11610 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11612 cum->vregno += n_elts;
11614 if (!TARGET_ALTIVEC)
11615 error ("cannot pass argument in vector register because"
11616 " altivec instructions are disabled, use -maltivec"
11617 " to enable them");
11619 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11620 even if it is going to be passed in a vector register.
11621 Darwin does the same for variable-argument functions. */
11622 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11623 && TARGET_64BIT)
11624 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11625 stack = true;
11627 else
11628 stack = true;
11630 if (stack)
11632 int align;
11634 /* Vector parameters must be 16-byte aligned. In 32-bit
11635 mode this means we need to take into account the offset
11636 to the parameter save area. In 64-bit mode, they just
11637 have to start on an even word, since the parameter save
11638 area is 16-byte aligned. */
11639 if (TARGET_32BIT)
11640 align = -(rs6000_parm_offset () + cum->words) & 3;
11641 else
11642 align = cum->words & 1;
11643 cum->words += align + rs6000_arg_size (mode, type);
11645 if (TARGET_DEBUG_ARG)
11647 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11648 cum->words, align);
11649 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11650 cum->nargs_prototype, cum->prototype,
11651 GET_MODE_NAME (mode));
11655 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
11656 && !cum->stdarg
11657 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11658 cum->sysv_gregno++;
11660 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11662 int size = int_size_in_bytes (type);
11663 /* Variable sized types have size == -1 and are
11664 treated as if consisting entirely of ints.
11665 Pad to 16 byte boundary if needed. */
11666 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11667 && (cum->words % 2) != 0)
11668 cum->words++;
11669 /* For varargs, we can just go up by the size of the struct. */
11670 if (!named)
11671 cum->words += (size + 7) / 8;
11672 else
11674 /* It is tempting to say int register count just goes up by
11675 sizeof(type)/8, but this is wrong in a case such as
11676 { int; double; int; } [powerpc alignment]. We have to
11677 grovel through the fields for these too. */
11678 cum->intoffset = 0;
11679 cum->floats_in_gpr = 0;
11680 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11681 rs6000_darwin64_record_arg_advance_flush (cum,
11682 size * BITS_PER_UNIT, 1);
11684 if (TARGET_DEBUG_ARG)
11686 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11687 cum->words, TYPE_ALIGN (type), size);
11688 fprintf (stderr,
11689 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11690 cum->nargs_prototype, cum->prototype,
11691 GET_MODE_NAME (mode));
11694 else if (DEFAULT_ABI == ABI_V4)
11696 if (abi_v4_pass_in_fpr (mode))
11698 /* _Decimal128 must use an even/odd register pair. This assumes
11699 that the register number is odd when fregno is odd. */
11700 if (mode == TDmode && (cum->fregno % 2) == 1)
11701 cum->fregno++;
11703 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11704 <= FP_ARG_V4_MAX_REG)
11705 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11706 else
11708 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11709 if (mode == DFmode || FLOAT128_IBM_P (mode)
11710 || mode == DDmode || mode == TDmode)
11711 cum->words += cum->words & 1;
11712 cum->words += rs6000_arg_size (mode, type);
11715 else
11717 int n_words = rs6000_arg_size (mode, type);
11718 int gregno = cum->sysv_gregno;
11720 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11721 (r7,r8) or (r9,r10). As does any other 2 word item such
11722 as complex int due to a historical mistake. */
11723 if (n_words == 2)
11724 gregno += (1 - gregno) & 1;
11726 /* Multi-reg args are not split between registers and stack. */
11727 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11729 /* Long long and SPE vectors are aligned on the stack.
11730 So are other 2 word items such as complex int due to
11731 a historical mistake. */
11732 if (n_words == 2)
11733 cum->words += cum->words & 1;
11734 cum->words += n_words;
11737 /* Note: continuing to accumulate gregno past when we've started
11738 spilling to the stack indicates the fact that we've started
11739 spilling to the stack to expand_builtin_saveregs. */
11740 cum->sysv_gregno = gregno + n_words;
11743 if (TARGET_DEBUG_ARG)
11745 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11746 cum->words, cum->fregno);
11747 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11748 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11749 fprintf (stderr, "mode = %4s, named = %d\n",
11750 GET_MODE_NAME (mode), named);
11753 else
11755 int n_words = rs6000_arg_size (mode, type);
11756 int start_words = cum->words;
11757 int align_words = rs6000_parm_start (mode, type, start_words);
11759 cum->words = align_words + n_words;
11761 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
11763 /* _Decimal128 must be passed in an even/odd float register pair.
11764 This assumes that the register number is odd when fregno is
11765 odd. */
11766 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11767 cum->fregno++;
11768 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11771 if (TARGET_DEBUG_ARG)
11773 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11774 cum->words, cum->fregno);
11775 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11776 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11777 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11778 named, align_words - start_words, depth);
11783 static void
11784 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11785 const_tree type, bool named)
11787 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11791 static rtx
11792 spe_build_register_parallel (machine_mode mode, int gregno)
11794 rtx r1, r3, r5, r7;
11796 switch (mode)
11798 case DFmode:
11799 r1 = gen_rtx_REG (DImode, gregno);
11800 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11801 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
11803 case DCmode:
11804 case TFmode:
11805 r1 = gen_rtx_REG (DImode, gregno);
11806 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11807 r3 = gen_rtx_REG (DImode, gregno + 2);
11808 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11809 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
11811 case TCmode:
11812 r1 = gen_rtx_REG (DImode, gregno);
11813 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11814 r3 = gen_rtx_REG (DImode, gregno + 2);
11815 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11816 r5 = gen_rtx_REG (DImode, gregno + 4);
11817 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
11818 r7 = gen_rtx_REG (DImode, gregno + 6);
11819 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
11820 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
11822 default:
11823 gcc_unreachable ();
11827 /* Determine where to put a SIMD argument on the SPE. */
11828 static rtx
11829 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
11830 const_tree type)
11832 int gregno = cum->sysv_gregno;
11834 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
11835 are passed and returned in a pair of GPRs for ABI compatibility. */
11836 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
11837 || mode == DCmode || mode == TCmode))
11839 int n_words = rs6000_arg_size (mode, type);
11841 /* Doubles go in an odd/even register pair (r5/r6, etc). */
11842 if (mode == DFmode)
11843 gregno += (1 - gregno) & 1;
11845 /* Multi-reg args are not split between registers and stack. */
11846 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11847 return NULL_RTX;
11849 return spe_build_register_parallel (mode, gregno);
11851 if (cum->stdarg)
11853 int n_words = rs6000_arg_size (mode, type);
11855 /* SPE vectors are put in odd registers. */
11856 if (n_words == 2 && (gregno & 1) == 0)
11857 gregno += 1;
11859 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
11861 rtx r1, r2;
11862 machine_mode m = SImode;
11864 r1 = gen_rtx_REG (m, gregno);
11865 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
11866 r2 = gen_rtx_REG (m, gregno + 1);
11867 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
11868 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
11870 else
11871 return NULL_RTX;
11873 else
11875 if (gregno <= GP_ARG_MAX_REG)
11876 return gen_rtx_REG (mode, gregno);
11877 else
11878 return NULL_RTX;
11882 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11883 structure between cum->intoffset and bitpos to integer registers. */
11885 static void
11886 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11887 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11889 machine_mode mode;
11890 unsigned int regno;
11891 unsigned int startbit, endbit;
11892 int this_regno, intregs, intoffset;
11893 rtx reg;
11895 if (cum->intoffset == -1)
11896 return;
11898 intoffset = cum->intoffset;
11899 cum->intoffset = -1;
11901 /* If this is the trailing part of a word, try to only load that
11902 much into the register. Otherwise load the whole register. Note
11903 that in the latter case we may pick up unwanted bits. It's not a
11904 problem at the moment but may wish to revisit. */
11906 if (intoffset % BITS_PER_WORD != 0)
11908 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11909 MODE_INT, 0);
11910 if (mode == BLKmode)
11912 /* We couldn't find an appropriate mode, which happens,
11913 e.g., in packed structs when there are 3 bytes to load.
11914 Back intoffset back to the beginning of the word in this
11915 case. */
11916 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11917 mode = word_mode;
11920 else
11921 mode = word_mode;
11923 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11924 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11925 intregs = (endbit - startbit) / BITS_PER_WORD;
11926 this_regno = cum->words + intoffset / BITS_PER_WORD;
11928 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11929 cum->use_stack = 1;
11931 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11932 if (intregs <= 0)
11933 return;
11935 intoffset /= BITS_PER_UNIT;
11938 regno = GP_ARG_MIN_REG + this_regno;
11939 reg = gen_rtx_REG (mode, regno);
11940 rvec[(*k)++] =
11941 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11943 this_regno += 1;
11944 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11945 mode = word_mode;
11946 intregs -= 1;
11948 while (intregs > 0);
11951 /* Recursive workhorse for the following. */
11953 static void
11954 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11955 HOST_WIDE_INT startbitpos, rtx rvec[],
11956 int *k)
11958 tree f;
11960 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11961 if (TREE_CODE (f) == FIELD_DECL)
11963 HOST_WIDE_INT bitpos = startbitpos;
11964 tree ftype = TREE_TYPE (f);
11965 machine_mode mode;
11966 if (ftype == error_mark_node)
11967 continue;
11968 mode = TYPE_MODE (ftype);
11970 if (DECL_SIZE (f) != 0
11971 && tree_fits_uhwi_p (bit_position (f)))
11972 bitpos += int_bit_position (f);
11974 /* ??? FIXME: else assume zero offset. */
11976 if (TREE_CODE (ftype) == RECORD_TYPE)
11977 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11978 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11980 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11981 #if 0
11982 switch (mode)
11984 case SCmode: mode = SFmode; break;
11985 case DCmode: mode = DFmode; break;
11986 case TCmode: mode = TFmode; break;
11987 default: break;
11989 #endif
11990 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11991 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11993 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11994 && (mode == TFmode || mode == TDmode));
11995 /* Long double or _Decimal128 split over regs and memory. */
11996 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11997 cum->use_stack=1;
11999 rvec[(*k)++]
12000 = gen_rtx_EXPR_LIST (VOIDmode,
12001 gen_rtx_REG (mode, cum->fregno++),
12002 GEN_INT (bitpos / BITS_PER_UNIT));
12003 if (FLOAT128_2REG_P (mode))
12004 cum->fregno++;
12006 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12008 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12009 rvec[(*k)++]
12010 = gen_rtx_EXPR_LIST (VOIDmode,
12011 gen_rtx_REG (mode, cum->vregno++),
12012 GEN_INT (bitpos / BITS_PER_UNIT));
12014 else if (cum->intoffset == -1)
12015 cum->intoffset = bitpos;
12019 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12020 the register(s) to be used for each field and subfield of a struct
12021 being passed by value, along with the offset of where the
12022 register's value may be found in the block. FP fields go in FP
12023 register, vector fields go in vector registers, and everything
12024 else goes in int registers, packed as in memory.
12026 This code is also used for function return values. RETVAL indicates
12027 whether this is the case.
12029 Much of this is taken from the SPARC V9 port, which has a similar
12030 calling convention. */
12032 static rtx
12033 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12034 bool named, bool retval)
12036 rtx rvec[FIRST_PSEUDO_REGISTER];
12037 int k = 1, kbase = 1;
12038 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12039 /* This is a copy; modifications are not visible to our caller. */
12040 CUMULATIVE_ARGS copy_cum = *orig_cum;
12041 CUMULATIVE_ARGS *cum = &copy_cum;
12043 /* Pad to 16 byte boundary if needed. */
12044 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12045 && (cum->words % 2) != 0)
12046 cum->words++;
12048 cum->intoffset = 0;
12049 cum->use_stack = 0;
12050 cum->named = named;
12052 /* Put entries into rvec[] for individual FP and vector fields, and
12053 for the chunks of memory that go in int regs. Note we start at
12054 element 1; 0 is reserved for an indication of using memory, and
12055 may or may not be filled in below. */
12056 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12057 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12059 /* If any part of the struct went on the stack put all of it there.
12060 This hack is because the generic code for
12061 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12062 parts of the struct are not at the beginning. */
12063 if (cum->use_stack)
12065 if (retval)
12066 return NULL_RTX; /* doesn't go in registers at all */
12067 kbase = 0;
12068 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12070 if (k > 1 || cum->use_stack)
12071 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12072 else
12073 return NULL_RTX;
12076 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12078 static rtx
12079 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12080 int align_words)
12082 int n_units;
12083 int i, k;
12084 rtx rvec[GP_ARG_NUM_REG + 1];
12086 if (align_words >= GP_ARG_NUM_REG)
12087 return NULL_RTX;
12089 n_units = rs6000_arg_size (mode, type);
12091 /* Optimize the simple case where the arg fits in one gpr, except in
12092 the case of BLKmode due to assign_parms assuming that registers are
12093 BITS_PER_WORD wide. */
12094 if (n_units == 0
12095 || (n_units == 1 && mode != BLKmode))
12096 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12098 k = 0;
12099 if (align_words + n_units > GP_ARG_NUM_REG)
12100 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12101 using a magic NULL_RTX component.
12102 This is not strictly correct. Only some of the arg belongs in
12103 memory, not all of it. However, the normal scheme using
12104 function_arg_partial_nregs can result in unusual subregs, eg.
12105 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12106 store the whole arg to memory is often more efficient than code
12107 to store pieces, and we know that space is available in the right
12108 place for the whole arg. */
12109 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12111 i = 0;
12114 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12115 rtx off = GEN_INT (i++ * 4);
12116 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12118 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12120 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12123 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12124 but must also be copied into the parameter save area starting at
12125 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12126 to the GPRs and/or memory. Return the number of elements used. */
12128 static int
12129 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12130 int align_words, rtx *rvec)
12132 int k = 0;
12134 if (align_words < GP_ARG_NUM_REG)
12136 int n_words = rs6000_arg_size (mode, type);
12138 if (align_words + n_words > GP_ARG_NUM_REG
12139 || mode == BLKmode
12140 || (TARGET_32BIT && TARGET_POWERPC64))
12142 /* If this is partially on the stack, then we only
12143 include the portion actually in registers here. */
12144 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12145 int i = 0;
12147 if (align_words + n_words > GP_ARG_NUM_REG)
12149 /* Not all of the arg fits in gprs. Say that it goes in memory
12150 too, using a magic NULL_RTX component. Also see comment in
12151 rs6000_mixed_function_arg for why the normal
12152 function_arg_partial_nregs scheme doesn't work in this case. */
12153 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12158 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12159 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12160 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12162 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12164 else
12166 /* The whole arg fits in gprs. */
12167 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12168 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12171 else
12173 /* It's entirely in memory. */
12174 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12177 return k;
12180 /* RVEC is a vector of K components of an argument of mode MODE.
12181 Construct the final function_arg return value from it. */
12183 static rtx
12184 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12186 gcc_assert (k >= 1);
12188 /* Avoid returning a PARALLEL in the trivial cases. */
12189 if (k == 1)
12191 if (XEXP (rvec[0], 0) == NULL_RTX)
12192 return NULL_RTX;
12194 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12195 return XEXP (rvec[0], 0);
12198 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12201 /* Determine where to put an argument to a function.
12202 Value is zero to push the argument on the stack,
12203 or a hard register in which to store the argument.
12205 MODE is the argument's machine mode.
12206 TYPE is the data type of the argument (as a tree).
12207 This is null for libcalls where that information may
12208 not be available.
12209 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12210 the preceding args and about the function being called. It is
12211 not modified in this routine.
12212 NAMED is nonzero if this argument is a named parameter
12213 (otherwise it is an extra parameter matching an ellipsis).
12215 On RS/6000 the first eight words of non-FP are normally in registers
12216 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12217 Under V.4, the first 8 FP args are in registers.
12219 If this is floating-point and no prototype is specified, we use
12220 both an FP and integer register (or possibly FP reg and stack). Library
12221 functions (when CALL_LIBCALL is set) always have the proper types for args,
12222 so we can pass the FP value just in one register. emit_library_function
12223 doesn't support PARALLEL anyway.
12225 Note that for args passed by reference, function_arg will be called
12226 with MODE and TYPE set to that of the pointer to the arg, not the arg
12227 itself. */
12229 static rtx
12230 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12231 const_tree type, bool named)
12233 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12234 enum rs6000_abi abi = DEFAULT_ABI;
12235 machine_mode elt_mode;
12236 int n_elts;
12238 /* Return a marker to indicate whether CR1 needs to set or clear the
12239 bit that V.4 uses to say fp args were passed in registers.
12240 Assume that we don't need the marker for software floating point,
12241 or compiler generated library calls. */
12242 if (mode == VOIDmode)
12244 if (abi == ABI_V4
12245 && (cum->call_cookie & CALL_LIBCALL) == 0
12246 && (cum->stdarg
12247 || (cum->nargs_prototype < 0
12248 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12250 /* For the SPE, we need to crxor CR6 always. */
12251 if (TARGET_SPE_ABI)
12252 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12253 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12254 return GEN_INT (cum->call_cookie
12255 | ((cum->fregno == FP_ARG_MIN_REG)
12256 ? CALL_V4_SET_FP_ARGS
12257 : CALL_V4_CLEAR_FP_ARGS));
12260 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12263 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12265 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12267 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12268 if (rslt != NULL_RTX)
12269 return rslt;
12270 /* Else fall through to usual handling. */
12273 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12275 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12276 rtx r, off;
12277 int i, k = 0;
12279 /* Do we also need to pass this argument in the parameter save area?
12280 Library support functions for IEEE 128-bit are assumed to not need the
12281 value passed both in GPRs and in vector registers. */
12282 if (TARGET_64BIT && !cum->prototype
12283 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12285 int align_words = ROUND_UP (cum->words, 2);
12286 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12289 /* Describe where this argument goes in the vector registers. */
12290 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12292 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12293 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12294 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12297 return rs6000_finish_function_arg (mode, rvec, k);
12299 else if (TARGET_ALTIVEC_ABI
12300 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12301 || (type && TREE_CODE (type) == VECTOR_TYPE
12302 && int_size_in_bytes (type) == 16)))
12304 if (named || abi == ABI_V4)
12305 return NULL_RTX;
12306 else
12308 /* Vector parameters to varargs functions under AIX or Darwin
12309 get passed in memory and possibly also in GPRs. */
12310 int align, align_words, n_words;
12311 machine_mode part_mode;
12313 /* Vector parameters must be 16-byte aligned. In 32-bit
12314 mode this means we need to take into account the offset
12315 to the parameter save area. In 64-bit mode, they just
12316 have to start on an even word, since the parameter save
12317 area is 16-byte aligned. */
12318 if (TARGET_32BIT)
12319 align = -(rs6000_parm_offset () + cum->words) & 3;
12320 else
12321 align = cum->words & 1;
12322 align_words = cum->words + align;
12324 /* Out of registers? Memory, then. */
12325 if (align_words >= GP_ARG_NUM_REG)
12326 return NULL_RTX;
12328 if (TARGET_32BIT && TARGET_POWERPC64)
12329 return rs6000_mixed_function_arg (mode, type, align_words);
12331 /* The vector value goes in GPRs. Only the part of the
12332 value in GPRs is reported here. */
12333 part_mode = mode;
12334 n_words = rs6000_arg_size (mode, type);
12335 if (align_words + n_words > GP_ARG_NUM_REG)
12336 /* Fortunately, there are only two possibilities, the value
12337 is either wholly in GPRs or half in GPRs and half not. */
12338 part_mode = DImode;
12340 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12343 else if (TARGET_SPE_ABI && TARGET_SPE
12344 && (SPE_VECTOR_MODE (mode)
12345 || (TARGET_E500_DOUBLE && (mode == DFmode
12346 || mode == DCmode
12347 || mode == TFmode
12348 || mode == TCmode))))
12349 return rs6000_spe_function_arg (cum, mode, type);
12351 else if (abi == ABI_V4)
12353 if (abi_v4_pass_in_fpr (mode))
12355 /* _Decimal128 must use an even/odd register pair. This assumes
12356 that the register number is odd when fregno is odd. */
12357 if (mode == TDmode && (cum->fregno % 2) == 1)
12358 cum->fregno++;
12360 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12361 <= FP_ARG_V4_MAX_REG)
12362 return gen_rtx_REG (mode, cum->fregno);
12363 else
12364 return NULL_RTX;
12366 else
12368 int n_words = rs6000_arg_size (mode, type);
12369 int gregno = cum->sysv_gregno;
12371 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12372 (r7,r8) or (r9,r10). As does any other 2 word item such
12373 as complex int due to a historical mistake. */
12374 if (n_words == 2)
12375 gregno += (1 - gregno) & 1;
12377 /* Multi-reg args are not split between registers and stack. */
12378 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12379 return NULL_RTX;
12381 if (TARGET_32BIT && TARGET_POWERPC64)
12382 return rs6000_mixed_function_arg (mode, type,
12383 gregno - GP_ARG_MIN_REG);
12384 return gen_rtx_REG (mode, gregno);
12387 else
12389 int align_words = rs6000_parm_start (mode, type, cum->words);
12391 /* _Decimal128 must be passed in an even/odd float register pair.
12392 This assumes that the register number is odd when fregno is odd. */
12393 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12394 cum->fregno++;
12396 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12398 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12399 rtx r, off;
12400 int i, k = 0;
12401 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12402 int fpr_words;
12404 /* Do we also need to pass this argument in the parameter
12405 save area? */
12406 if (type && (cum->nargs_prototype <= 0
12407 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12408 && TARGET_XL_COMPAT
12409 && align_words >= GP_ARG_NUM_REG)))
12410 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12412 /* Describe where this argument goes in the fprs. */
12413 for (i = 0; i < n_elts
12414 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12416 /* Check if the argument is split over registers and memory.
12417 This can only ever happen for long double or _Decimal128;
12418 complex types are handled via split_complex_arg. */
12419 machine_mode fmode = elt_mode;
12420 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12422 gcc_assert (FLOAT128_2REG_P (fmode));
12423 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12426 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12427 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12428 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12431 /* If there were not enough FPRs to hold the argument, the rest
12432 usually goes into memory. However, if the current position
12433 is still within the register parameter area, a portion may
12434 actually have to go into GPRs.
12436 Note that it may happen that the portion of the argument
12437 passed in the first "half" of the first GPR was already
12438 passed in the last FPR as well.
12440 For unnamed arguments, we already set up GPRs to cover the
12441 whole argument in rs6000_psave_function_arg, so there is
12442 nothing further to do at this point. */
12443 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12444 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12445 && cum->nargs_prototype > 0)
12447 static bool warned;
12449 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12450 int n_words = rs6000_arg_size (mode, type);
12452 align_words += fpr_words;
12453 n_words -= fpr_words;
12457 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12458 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12459 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12461 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12463 if (!warned && warn_psabi)
12465 warned = true;
12466 inform (input_location,
12467 "the ABI of passing homogeneous float aggregates"
12468 " has changed in GCC 5");
12472 return rs6000_finish_function_arg (mode, rvec, k);
12474 else if (align_words < GP_ARG_NUM_REG)
12476 if (TARGET_32BIT && TARGET_POWERPC64)
12477 return rs6000_mixed_function_arg (mode, type, align_words);
12479 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12481 else
12482 return NULL_RTX;
12486 /* For an arg passed partly in registers and partly in memory, this is
12487 the number of bytes passed in registers. For args passed entirely in
12488 registers or entirely in memory, zero. When an arg is described by a
12489 PARALLEL, perhaps using more than one register type, this function
12490 returns the number of bytes used by the first element of the PARALLEL. */
12492 static int
12493 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12494 tree type, bool named)
12496 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12497 bool passed_in_gprs = true;
12498 int ret = 0;
12499 int align_words;
12500 machine_mode elt_mode;
12501 int n_elts;
12503 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12505 if (DEFAULT_ABI == ABI_V4)
12506 return 0;
12508 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12510 /* If we are passing this arg in the fixed parameter save area (gprs or
12511 memory) as well as VRs, we do not use the partial bytes mechanism;
12512 instead, rs6000_function_arg will return a PARALLEL including a memory
12513 element as necessary. Library support functions for IEEE 128-bit are
12514 assumed to not need the value passed both in GPRs and in vector
12515 registers. */
12516 if (TARGET_64BIT && !cum->prototype
12517 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12518 return 0;
12520 /* Otherwise, we pass in VRs only. Check for partial copies. */
12521 passed_in_gprs = false;
12522 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12523 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12526 /* In this complicated case we just disable the partial_nregs code. */
12527 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12528 return 0;
12530 align_words = rs6000_parm_start (mode, type, cum->words);
12532 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12534 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12536 /* If we are passing this arg in the fixed parameter save area
12537 (gprs or memory) as well as FPRs, we do not use the partial
12538 bytes mechanism; instead, rs6000_function_arg will return a
12539 PARALLEL including a memory element as necessary. */
12540 if (type
12541 && (cum->nargs_prototype <= 0
12542 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12543 && TARGET_XL_COMPAT
12544 && align_words >= GP_ARG_NUM_REG)))
12545 return 0;
12547 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12548 passed_in_gprs = false;
12549 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12551 /* Compute number of bytes / words passed in FPRs. If there
12552 is still space available in the register parameter area
12553 *after* that amount, a part of the argument will be passed
12554 in GPRs. In that case, the total amount passed in any
12555 registers is equal to the amount that would have been passed
12556 in GPRs if everything were passed there, so we fall back to
12557 the GPR code below to compute the appropriate value. */
12558 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12559 * MIN (8, GET_MODE_SIZE (elt_mode)));
12560 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12562 if (align_words + fpr_words < GP_ARG_NUM_REG)
12563 passed_in_gprs = true;
12564 else
12565 ret = fpr;
12569 if (passed_in_gprs
12570 && align_words < GP_ARG_NUM_REG
12571 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12572 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12574 if (ret != 0 && TARGET_DEBUG_ARG)
12575 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12577 return ret;
12580 /* A C expression that indicates when an argument must be passed by
12581 reference. If nonzero for an argument, a copy of that argument is
12582 made in memory and a pointer to the argument is passed instead of
12583 the argument itself. The pointer is passed in whatever way is
12584 appropriate for passing a pointer to that type.
12586 Under V.4, aggregates and long double are passed by reference.
12588 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12589 reference unless the AltiVec vector extension ABI is in force.
12591 As an extension to all ABIs, variable sized types are passed by
12592 reference. */
12594 static bool
12595 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12596 machine_mode mode, const_tree type,
12597 bool named ATTRIBUTE_UNUSED)
12599 if (!type)
12600 return 0;
12602 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12603 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12605 if (TARGET_DEBUG_ARG)
12606 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12607 return 1;
12610 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12612 if (TARGET_DEBUG_ARG)
12613 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12614 return 1;
12617 if (int_size_in_bytes (type) < 0)
12619 if (TARGET_DEBUG_ARG)
12620 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12621 return 1;
12624 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12625 modes only exist for GCC vector types if -maltivec. */
12626 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12628 if (TARGET_DEBUG_ARG)
12629 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12630 return 1;
12633 /* Pass synthetic vectors in memory. */
12634 if (TREE_CODE (type) == VECTOR_TYPE
12635 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12637 static bool warned_for_pass_big_vectors = false;
12638 if (TARGET_DEBUG_ARG)
12639 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12640 if (!warned_for_pass_big_vectors)
12642 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12643 "non-standard ABI extension with no compatibility guarantee");
12644 warned_for_pass_big_vectors = true;
12646 return 1;
12649 return 0;
12652 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12653 already processes. Return true if the parameter must be passed
12654 (fully or partially) on the stack. */
12656 static bool
12657 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12659 machine_mode mode;
12660 int unsignedp;
12661 rtx entry_parm;
12663 /* Catch errors. */
12664 if (type == NULL || type == error_mark_node)
12665 return true;
12667 /* Handle types with no storage requirement. */
12668 if (TYPE_MODE (type) == VOIDmode)
12669 return false;
12671 /* Handle complex types. */
12672 if (TREE_CODE (type) == COMPLEX_TYPE)
12673 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12674 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12676 /* Handle transparent aggregates. */
12677 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12678 && TYPE_TRANSPARENT_AGGR (type))
12679 type = TREE_TYPE (first_field (type));
12681 /* See if this arg was passed by invisible reference. */
12682 if (pass_by_reference (get_cumulative_args (args_so_far),
12683 TYPE_MODE (type), type, true))
12684 type = build_pointer_type (type);
12686 /* Find mode as it is passed by the ABI. */
12687 unsignedp = TYPE_UNSIGNED (type);
12688 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12690 /* If we must pass in stack, we need a stack. */
12691 if (rs6000_must_pass_in_stack (mode, type))
12692 return true;
12694 /* If there is no incoming register, we need a stack. */
12695 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12696 if (entry_parm == NULL)
12697 return true;
12699 /* Likewise if we need to pass both in registers and on the stack. */
12700 if (GET_CODE (entry_parm) == PARALLEL
12701 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12702 return true;
12704 /* Also true if we're partially in registers and partially not. */
12705 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12706 return true;
12708 /* Update info on where next arg arrives in registers. */
12709 rs6000_function_arg_advance (args_so_far, mode, type, true);
12710 return false;
12713 /* Return true if FUN has no prototype, has a variable argument
12714 list, or passes any parameter in memory. */
12716 static bool
12717 rs6000_function_parms_need_stack (tree fun, bool incoming)
12719 tree fntype, result;
12720 CUMULATIVE_ARGS args_so_far_v;
12721 cumulative_args_t args_so_far;
12723 if (!fun)
12724 /* Must be a libcall, all of which only use reg parms. */
12725 return false;
12727 fntype = fun;
12728 if (!TYPE_P (fun))
12729 fntype = TREE_TYPE (fun);
12731 /* Varargs functions need the parameter save area. */
12732 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12733 return true;
12735 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12736 args_so_far = pack_cumulative_args (&args_so_far_v);
12738 /* When incoming, we will have been passed the function decl.
12739 It is necessary to use the decl to handle K&R style functions,
12740 where TYPE_ARG_TYPES may not be available. */
12741 if (incoming)
12743 gcc_assert (DECL_P (fun));
12744 result = DECL_RESULT (fun);
12746 else
12747 result = TREE_TYPE (fntype);
12749 if (result && aggregate_value_p (result, fntype))
12751 if (!TYPE_P (result))
12752 result = TREE_TYPE (result);
12753 result = build_pointer_type (result);
12754 rs6000_parm_needs_stack (args_so_far, result);
12757 if (incoming)
12759 tree parm;
12761 for (parm = DECL_ARGUMENTS (fun);
12762 parm && parm != void_list_node;
12763 parm = TREE_CHAIN (parm))
12764 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12765 return true;
12767 else
12769 function_args_iterator args_iter;
12770 tree arg_type;
12772 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12773 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12774 return true;
12777 return false;
12780 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12781 usually a constant depending on the ABI. However, in the ELFv2 ABI
12782 the register parameter area is optional when calling a function that
12783 has a prototype is scope, has no variable argument list, and passes
12784 all parameters in registers. */
12787 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12789 int reg_parm_stack_space;
12791 switch (DEFAULT_ABI)
12793 default:
12794 reg_parm_stack_space = 0;
12795 break;
12797 case ABI_AIX:
12798 case ABI_DARWIN:
12799 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12800 break;
12802 case ABI_ELFv2:
12803 /* ??? Recomputing this every time is a bit expensive. Is there
12804 a place to cache this information? */
12805 if (rs6000_function_parms_need_stack (fun, incoming))
12806 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12807 else
12808 reg_parm_stack_space = 0;
12809 break;
12812 return reg_parm_stack_space;
12815 static void
12816 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12818 int i;
12819 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12821 if (nregs == 0)
12822 return;
12824 for (i = 0; i < nregs; i++)
12826 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12827 if (reload_completed)
12829 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12830 tem = NULL_RTX;
12831 else
12832 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12833 i * GET_MODE_SIZE (reg_mode));
12835 else
12836 tem = replace_equiv_address (tem, XEXP (tem, 0));
12838 gcc_assert (tem);
12840 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12844 /* Perform any needed actions needed for a function that is receiving a
12845 variable number of arguments.
12847 CUM is as above.
12849 MODE and TYPE are the mode and type of the current parameter.
12851 PRETEND_SIZE is a variable that should be set to the amount of stack
12852 that must be pushed by the prolog to pretend that our caller pushed
12855 Normally, this macro will push all remaining incoming registers on the
12856 stack and set PRETEND_SIZE to the length of the registers pushed. */
12858 static void
12859 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12860 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12861 int no_rtl)
12863 CUMULATIVE_ARGS next_cum;
12864 int reg_size = TARGET_32BIT ? 4 : 8;
12865 rtx save_area = NULL_RTX, mem;
12866 int first_reg_offset;
12867 alias_set_type set;
12869 /* Skip the last named argument. */
12870 next_cum = *get_cumulative_args (cum);
12871 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12873 if (DEFAULT_ABI == ABI_V4)
12875 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12877 if (! no_rtl)
12879 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12880 HOST_WIDE_INT offset = 0;
12882 /* Try to optimize the size of the varargs save area.
12883 The ABI requires that ap.reg_save_area is doubleword
12884 aligned, but we don't need to allocate space for all
12885 the bytes, only those to which we actually will save
12886 anything. */
12887 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12888 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12889 if (TARGET_HARD_FLOAT && TARGET_FPRS
12890 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12891 && cfun->va_list_fpr_size)
12893 if (gpr_reg_num)
12894 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12895 * UNITS_PER_FP_WORD;
12896 if (cfun->va_list_fpr_size
12897 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12898 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12899 else
12900 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12901 * UNITS_PER_FP_WORD;
12903 if (gpr_reg_num)
12905 offset = -((first_reg_offset * reg_size) & ~7);
12906 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12908 gpr_reg_num = cfun->va_list_gpr_size;
12909 if (reg_size == 4 && (first_reg_offset & 1))
12910 gpr_reg_num++;
12912 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12914 else if (fpr_size)
12915 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12916 * UNITS_PER_FP_WORD
12917 - (int) (GP_ARG_NUM_REG * reg_size);
12919 if (gpr_size + fpr_size)
12921 rtx reg_save_area
12922 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12923 gcc_assert (GET_CODE (reg_save_area) == MEM);
12924 reg_save_area = XEXP (reg_save_area, 0);
12925 if (GET_CODE (reg_save_area) == PLUS)
12927 gcc_assert (XEXP (reg_save_area, 0)
12928 == virtual_stack_vars_rtx);
12929 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12930 offset += INTVAL (XEXP (reg_save_area, 1));
12932 else
12933 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12936 cfun->machine->varargs_save_offset = offset;
12937 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12940 else
12942 first_reg_offset = next_cum.words;
12943 save_area = crtl->args.internal_arg_pointer;
12945 if (targetm.calls.must_pass_in_stack (mode, type))
12946 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12949 set = get_varargs_alias_set ();
12950 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12951 && cfun->va_list_gpr_size)
12953 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12955 if (va_list_gpr_counter_field)
12956 /* V4 va_list_gpr_size counts number of registers needed. */
12957 n_gpr = cfun->va_list_gpr_size;
12958 else
12959 /* char * va_list instead counts number of bytes needed. */
12960 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12962 if (nregs > n_gpr)
12963 nregs = n_gpr;
12965 mem = gen_rtx_MEM (BLKmode,
12966 plus_constant (Pmode, save_area,
12967 first_reg_offset * reg_size));
12968 MEM_NOTRAP_P (mem) = 1;
12969 set_mem_alias_set (mem, set);
12970 set_mem_align (mem, BITS_PER_WORD);
12972 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12973 nregs);
12976 /* Save FP registers if needed. */
12977 if (DEFAULT_ABI == ABI_V4
12978 && TARGET_HARD_FLOAT && TARGET_FPRS
12979 && ! no_rtl
12980 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12981 && cfun->va_list_fpr_size)
12983 int fregno = next_cum.fregno, nregs;
12984 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12985 rtx lab = gen_label_rtx ();
12986 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12987 * UNITS_PER_FP_WORD);
12989 emit_jump_insn
12990 (gen_rtx_SET (pc_rtx,
12991 gen_rtx_IF_THEN_ELSE (VOIDmode,
12992 gen_rtx_NE (VOIDmode, cr1,
12993 const0_rtx),
12994 gen_rtx_LABEL_REF (VOIDmode, lab),
12995 pc_rtx)));
12997 for (nregs = 0;
12998 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12999 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13001 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13002 ? DFmode : SFmode,
13003 plus_constant (Pmode, save_area, off));
13004 MEM_NOTRAP_P (mem) = 1;
13005 set_mem_alias_set (mem, set);
13006 set_mem_align (mem, GET_MODE_ALIGNMENT (
13007 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13008 ? DFmode : SFmode));
13009 emit_move_insn (mem, gen_rtx_REG (
13010 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13011 ? DFmode : SFmode, fregno));
13014 emit_label (lab);
13018 /* Create the va_list data type. */
13020 static tree
13021 rs6000_build_builtin_va_list (void)
13023 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13025 /* For AIX, prefer 'char *' because that's what the system
13026 header files like. */
13027 if (DEFAULT_ABI != ABI_V4)
13028 return build_pointer_type (char_type_node);
13030 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13031 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13032 get_identifier ("__va_list_tag"), record);
13034 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13035 unsigned_char_type_node);
13036 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13037 unsigned_char_type_node);
13038 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13039 every user file. */
13040 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13041 get_identifier ("reserved"), short_unsigned_type_node);
13042 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13043 get_identifier ("overflow_arg_area"),
13044 ptr_type_node);
13045 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13046 get_identifier ("reg_save_area"),
13047 ptr_type_node);
13049 va_list_gpr_counter_field = f_gpr;
13050 va_list_fpr_counter_field = f_fpr;
13052 DECL_FIELD_CONTEXT (f_gpr) = record;
13053 DECL_FIELD_CONTEXT (f_fpr) = record;
13054 DECL_FIELD_CONTEXT (f_res) = record;
13055 DECL_FIELD_CONTEXT (f_ovf) = record;
13056 DECL_FIELD_CONTEXT (f_sav) = record;
13058 TYPE_STUB_DECL (record) = type_decl;
13059 TYPE_NAME (record) = type_decl;
13060 TYPE_FIELDS (record) = f_gpr;
13061 DECL_CHAIN (f_gpr) = f_fpr;
13062 DECL_CHAIN (f_fpr) = f_res;
13063 DECL_CHAIN (f_res) = f_ovf;
13064 DECL_CHAIN (f_ovf) = f_sav;
13066 layout_type (record);
13068 /* The correct type is an array type of one element. */
13069 return build_array_type (record, build_index_type (size_zero_node));
13072 /* Implement va_start. */
13074 static void
13075 rs6000_va_start (tree valist, rtx nextarg)
13077 HOST_WIDE_INT words, n_gpr, n_fpr;
13078 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13079 tree gpr, fpr, ovf, sav, t;
13081 /* Only SVR4 needs something special. */
13082 if (DEFAULT_ABI != ABI_V4)
13084 std_expand_builtin_va_start (valist, nextarg);
13085 return;
13088 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13089 f_fpr = DECL_CHAIN (f_gpr);
13090 f_res = DECL_CHAIN (f_fpr);
13091 f_ovf = DECL_CHAIN (f_res);
13092 f_sav = DECL_CHAIN (f_ovf);
13094 valist = build_simple_mem_ref (valist);
13095 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13096 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13097 f_fpr, NULL_TREE);
13098 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13099 f_ovf, NULL_TREE);
13100 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13101 f_sav, NULL_TREE);
13103 /* Count number of gp and fp argument registers used. */
13104 words = crtl->args.info.words;
13105 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13106 GP_ARG_NUM_REG);
13107 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13108 FP_ARG_NUM_REG);
13110 if (TARGET_DEBUG_ARG)
13111 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13112 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13113 words, n_gpr, n_fpr);
13115 if (cfun->va_list_gpr_size)
13117 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13118 build_int_cst (NULL_TREE, n_gpr));
13119 TREE_SIDE_EFFECTS (t) = 1;
13120 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13123 if (cfun->va_list_fpr_size)
13125 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13126 build_int_cst (NULL_TREE, n_fpr));
13127 TREE_SIDE_EFFECTS (t) = 1;
13128 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13130 #ifdef HAVE_AS_GNU_ATTRIBUTE
13131 if (call_ABI_of_interest (cfun->decl))
13132 rs6000_passes_float = true;
13133 #endif
13136 /* Find the overflow area. */
13137 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13138 if (words != 0)
13139 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13140 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13141 TREE_SIDE_EFFECTS (t) = 1;
13142 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13144 /* If there were no va_arg invocations, don't set up the register
13145 save area. */
13146 if (!cfun->va_list_gpr_size
13147 && !cfun->va_list_fpr_size
13148 && n_gpr < GP_ARG_NUM_REG
13149 && n_fpr < FP_ARG_V4_MAX_REG)
13150 return;
13152 /* Find the register save area. */
13153 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13154 if (cfun->machine->varargs_save_offset)
13155 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13156 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13157 TREE_SIDE_EFFECTS (t) = 1;
13158 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13161 /* Implement va_arg. */
13163 static tree
13164 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13165 gimple_seq *post_p)
13167 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13168 tree gpr, fpr, ovf, sav, reg, t, u;
13169 int size, rsize, n_reg, sav_ofs, sav_scale;
13170 tree lab_false, lab_over, addr;
13171 int align;
13172 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13173 int regalign = 0;
13174 gimple *stmt;
13176 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13178 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13179 return build_va_arg_indirect_ref (t);
13182 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13183 earlier version of gcc, with the property that it always applied alignment
13184 adjustments to the va-args (even for zero-sized types). The cheapest way
13185 to deal with this is to replicate the effect of the part of
13186 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13187 of relevance.
13188 We don't need to check for pass-by-reference because of the test above.
13189 We can return a simplifed answer, since we know there's no offset to add. */
13191 if (((TARGET_MACHO
13192 && rs6000_darwin64_abi)
13193 || DEFAULT_ABI == ABI_ELFv2
13194 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13195 && integer_zerop (TYPE_SIZE (type)))
13197 unsigned HOST_WIDE_INT align, boundary;
13198 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13199 align = PARM_BOUNDARY / BITS_PER_UNIT;
13200 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13201 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13202 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13203 boundary /= BITS_PER_UNIT;
13204 if (boundary > align)
13206 tree t ;
13207 /* This updates arg ptr by the amount that would be necessary
13208 to align the zero-sized (but not zero-alignment) item. */
13209 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13210 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13211 gimplify_and_add (t, pre_p);
13213 t = fold_convert (sizetype, valist_tmp);
13214 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13215 fold_convert (TREE_TYPE (valist),
13216 fold_build2 (BIT_AND_EXPR, sizetype, t,
13217 size_int (-boundary))));
13218 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13219 gimplify_and_add (t, pre_p);
13221 /* Since it is zero-sized there's no increment for the item itself. */
13222 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13223 return build_va_arg_indirect_ref (valist_tmp);
13226 if (DEFAULT_ABI != ABI_V4)
13228 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13230 tree elem_type = TREE_TYPE (type);
13231 machine_mode elem_mode = TYPE_MODE (elem_type);
13232 int elem_size = GET_MODE_SIZE (elem_mode);
13234 if (elem_size < UNITS_PER_WORD)
13236 tree real_part, imag_part;
13237 gimple_seq post = NULL;
13239 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13240 &post);
13241 /* Copy the value into a temporary, lest the formal temporary
13242 be reused out from under us. */
13243 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13244 gimple_seq_add_seq (pre_p, post);
13246 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13247 post_p);
13249 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13253 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13256 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13257 f_fpr = DECL_CHAIN (f_gpr);
13258 f_res = DECL_CHAIN (f_fpr);
13259 f_ovf = DECL_CHAIN (f_res);
13260 f_sav = DECL_CHAIN (f_ovf);
13262 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13263 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13264 f_fpr, NULL_TREE);
13265 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13266 f_ovf, NULL_TREE);
13267 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13268 f_sav, NULL_TREE);
13270 size = int_size_in_bytes (type);
13271 rsize = (size + 3) / 4;
13272 align = 1;
13274 machine_mode mode = TYPE_MODE (type);
13275 if (abi_v4_pass_in_fpr (mode))
13277 /* FP args go in FP registers, if present. */
13278 reg = fpr;
13279 n_reg = (size + 7) / 8;
13280 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13281 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13282 if (mode != SFmode && mode != SDmode)
13283 align = 8;
13285 else
13287 /* Otherwise into GP registers. */
13288 reg = gpr;
13289 n_reg = rsize;
13290 sav_ofs = 0;
13291 sav_scale = 4;
13292 if (n_reg == 2)
13293 align = 8;
13296 /* Pull the value out of the saved registers.... */
13298 lab_over = NULL;
13299 addr = create_tmp_var (ptr_type_node, "addr");
13301 /* AltiVec vectors never go in registers when -mabi=altivec. */
13302 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13303 align = 16;
13304 else
13306 lab_false = create_artificial_label (input_location);
13307 lab_over = create_artificial_label (input_location);
13309 /* Long long and SPE vectors are aligned in the registers.
13310 As are any other 2 gpr item such as complex int due to a
13311 historical mistake. */
13312 u = reg;
13313 if (n_reg == 2 && reg == gpr)
13315 regalign = 1;
13316 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13317 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13318 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13319 unshare_expr (reg), u);
13321 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13322 reg number is 0 for f1, so we want to make it odd. */
13323 else if (reg == fpr && mode == TDmode)
13325 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13326 build_int_cst (TREE_TYPE (reg), 1));
13327 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13330 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13331 t = build2 (GE_EXPR, boolean_type_node, u, t);
13332 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13333 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13334 gimplify_and_add (t, pre_p);
13336 t = sav;
13337 if (sav_ofs)
13338 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13340 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13341 build_int_cst (TREE_TYPE (reg), n_reg));
13342 u = fold_convert (sizetype, u);
13343 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13344 t = fold_build_pointer_plus (t, u);
13346 /* _Decimal32 varargs are located in the second word of the 64-bit
13347 FP register for 32-bit binaries. */
13348 if (TARGET_32BIT
13349 && TARGET_HARD_FLOAT && TARGET_FPRS
13350 && mode == SDmode)
13351 t = fold_build_pointer_plus_hwi (t, size);
13353 gimplify_assign (addr, t, pre_p);
13355 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13357 stmt = gimple_build_label (lab_false);
13358 gimple_seq_add_stmt (pre_p, stmt);
13360 if ((n_reg == 2 && !regalign) || n_reg > 2)
13362 /* Ensure that we don't find any more args in regs.
13363 Alignment has taken care of for special cases. */
13364 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13368 /* ... otherwise out of the overflow area. */
13370 /* Care for on-stack alignment if needed. */
13371 t = ovf;
13372 if (align != 1)
13374 t = fold_build_pointer_plus_hwi (t, align - 1);
13375 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13376 build_int_cst (TREE_TYPE (t), -align));
13378 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13380 gimplify_assign (unshare_expr (addr), t, pre_p);
13382 t = fold_build_pointer_plus_hwi (t, size);
13383 gimplify_assign (unshare_expr (ovf), t, pre_p);
13385 if (lab_over)
13387 stmt = gimple_build_label (lab_over);
13388 gimple_seq_add_stmt (pre_p, stmt);
13391 if (STRICT_ALIGNMENT
13392 && (TYPE_ALIGN (type)
13393 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13395 /* The value (of type complex double, for example) may not be
13396 aligned in memory in the saved registers, so copy via a
13397 temporary. (This is the same code as used for SPARC.) */
13398 tree tmp = create_tmp_var (type, "va_arg_tmp");
13399 tree dest_addr = build_fold_addr_expr (tmp);
13401 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13402 3, dest_addr, addr, size_int (rsize * 4));
13404 gimplify_and_add (copy, pre_p);
13405 addr = dest_addr;
13408 addr = fold_convert (ptrtype, addr);
13409 return build_va_arg_indirect_ref (addr);
13412 /* Builtins. */
13414 static void
13415 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13417 tree t;
13418 unsigned classify = rs6000_builtin_info[(int)code].attr;
13419 const char *attr_string = "";
13421 gcc_assert (name != NULL);
13422 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13424 if (rs6000_builtin_decls[(int)code])
13425 fatal_error (input_location,
13426 "internal error: builtin function %s already processed", name);
13428 rs6000_builtin_decls[(int)code] = t =
13429 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13431 /* Set any special attributes. */
13432 if ((classify & RS6000_BTC_CONST) != 0)
13434 /* const function, function only depends on the inputs. */
13435 TREE_READONLY (t) = 1;
13436 TREE_NOTHROW (t) = 1;
13437 attr_string = ", const";
13439 else if ((classify & RS6000_BTC_PURE) != 0)
13441 /* pure function, function can read global memory, but does not set any
13442 external state. */
13443 DECL_PURE_P (t) = 1;
13444 TREE_NOTHROW (t) = 1;
13445 attr_string = ", pure";
13447 else if ((classify & RS6000_BTC_FP) != 0)
13449 /* Function is a math function. If rounding mode is on, then treat the
13450 function as not reading global memory, but it can have arbitrary side
13451 effects. If it is off, then assume the function is a const function.
13452 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13453 builtin-attribute.def that is used for the math functions. */
13454 TREE_NOTHROW (t) = 1;
13455 if (flag_rounding_math)
13457 DECL_PURE_P (t) = 1;
13458 DECL_IS_NOVOPS (t) = 1;
13459 attr_string = ", fp, pure";
13461 else
13463 TREE_READONLY (t) = 1;
13464 attr_string = ", fp, const";
13467 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13468 gcc_unreachable ();
13470 if (TARGET_DEBUG_BUILTIN)
13471 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13472 (int)code, name, attr_string);
13475 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13477 #undef RS6000_BUILTIN_0
13478 #undef RS6000_BUILTIN_1
13479 #undef RS6000_BUILTIN_2
13480 #undef RS6000_BUILTIN_3
13481 #undef RS6000_BUILTIN_A
13482 #undef RS6000_BUILTIN_D
13483 #undef RS6000_BUILTIN_E
13484 #undef RS6000_BUILTIN_H
13485 #undef RS6000_BUILTIN_P
13486 #undef RS6000_BUILTIN_Q
13487 #undef RS6000_BUILTIN_S
13488 #undef RS6000_BUILTIN_X
13490 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13491 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13492 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13493 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13494 { MASK, ICODE, NAME, ENUM },
13496 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13497 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13498 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13499 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13500 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13501 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13502 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13503 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13505 static const struct builtin_description bdesc_3arg[] =
13507 #include "rs6000-builtin.def"
13510 /* DST operations: void foo (void *, const int, const char). */
13512 #undef RS6000_BUILTIN_0
13513 #undef RS6000_BUILTIN_1
13514 #undef RS6000_BUILTIN_2
13515 #undef RS6000_BUILTIN_3
13516 #undef RS6000_BUILTIN_A
13517 #undef RS6000_BUILTIN_D
13518 #undef RS6000_BUILTIN_E
13519 #undef RS6000_BUILTIN_H
13520 #undef RS6000_BUILTIN_P
13521 #undef RS6000_BUILTIN_Q
13522 #undef RS6000_BUILTIN_S
13523 #undef RS6000_BUILTIN_X
13525 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13526 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13527 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13528 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13529 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13530 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13531 { MASK, ICODE, NAME, ENUM },
13533 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13534 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13535 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13536 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13537 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13538 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13540 static const struct builtin_description bdesc_dst[] =
13542 #include "rs6000-builtin.def"
13545 /* Simple binary operations: VECc = foo (VECa, VECb). */
13547 #undef RS6000_BUILTIN_0
13548 #undef RS6000_BUILTIN_1
13549 #undef RS6000_BUILTIN_2
13550 #undef RS6000_BUILTIN_3
13551 #undef RS6000_BUILTIN_A
13552 #undef RS6000_BUILTIN_D
13553 #undef RS6000_BUILTIN_E
13554 #undef RS6000_BUILTIN_H
13555 #undef RS6000_BUILTIN_P
13556 #undef RS6000_BUILTIN_Q
13557 #undef RS6000_BUILTIN_S
13558 #undef RS6000_BUILTIN_X
13560 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13561 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13562 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13563 { MASK, ICODE, NAME, ENUM },
13565 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13566 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13567 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13568 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13569 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13570 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13571 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13572 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13573 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13575 static const struct builtin_description bdesc_2arg[] =
13577 #include "rs6000-builtin.def"
13580 #undef RS6000_BUILTIN_0
13581 #undef RS6000_BUILTIN_1
13582 #undef RS6000_BUILTIN_2
13583 #undef RS6000_BUILTIN_3
13584 #undef RS6000_BUILTIN_A
13585 #undef RS6000_BUILTIN_D
13586 #undef RS6000_BUILTIN_E
13587 #undef RS6000_BUILTIN_H
13588 #undef RS6000_BUILTIN_P
13589 #undef RS6000_BUILTIN_Q
13590 #undef RS6000_BUILTIN_S
13591 #undef RS6000_BUILTIN_X
13593 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13594 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13595 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13596 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13597 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13598 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13599 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13600 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13601 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13602 { MASK, ICODE, NAME, ENUM },
13604 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13605 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13606 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13608 /* AltiVec predicates. */
13610 static const struct builtin_description bdesc_altivec_preds[] =
13612 #include "rs6000-builtin.def"
13615 /* SPE predicates. */
13616 #undef RS6000_BUILTIN_0
13617 #undef RS6000_BUILTIN_1
13618 #undef RS6000_BUILTIN_2
13619 #undef RS6000_BUILTIN_3
13620 #undef RS6000_BUILTIN_A
13621 #undef RS6000_BUILTIN_D
13622 #undef RS6000_BUILTIN_E
13623 #undef RS6000_BUILTIN_H
13624 #undef RS6000_BUILTIN_P
13625 #undef RS6000_BUILTIN_Q
13626 #undef RS6000_BUILTIN_S
13627 #undef RS6000_BUILTIN_X
13629 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13630 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13631 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13632 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13633 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13634 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13635 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13636 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13637 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13638 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13639 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
13640 { MASK, ICODE, NAME, ENUM },
13642 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13644 static const struct builtin_description bdesc_spe_predicates[] =
13646 #include "rs6000-builtin.def"
13649 /* SPE evsel predicates. */
13650 #undef RS6000_BUILTIN_0
13651 #undef RS6000_BUILTIN_1
13652 #undef RS6000_BUILTIN_2
13653 #undef RS6000_BUILTIN_3
13654 #undef RS6000_BUILTIN_A
13655 #undef RS6000_BUILTIN_D
13656 #undef RS6000_BUILTIN_E
13657 #undef RS6000_BUILTIN_H
13658 #undef RS6000_BUILTIN_P
13659 #undef RS6000_BUILTIN_Q
13660 #undef RS6000_BUILTIN_S
13661 #undef RS6000_BUILTIN_X
13663 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13664 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13665 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13666 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13667 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13668 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13669 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
13670 { MASK, ICODE, NAME, ENUM },
13672 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13673 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13674 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13675 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13676 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13678 static const struct builtin_description bdesc_spe_evsel[] =
13680 #include "rs6000-builtin.def"
13683 /* PAIRED predicates. */
13684 #undef RS6000_BUILTIN_0
13685 #undef RS6000_BUILTIN_1
13686 #undef RS6000_BUILTIN_2
13687 #undef RS6000_BUILTIN_3
13688 #undef RS6000_BUILTIN_A
13689 #undef RS6000_BUILTIN_D
13690 #undef RS6000_BUILTIN_E
13691 #undef RS6000_BUILTIN_H
13692 #undef RS6000_BUILTIN_P
13693 #undef RS6000_BUILTIN_Q
13694 #undef RS6000_BUILTIN_S
13695 #undef RS6000_BUILTIN_X
13697 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13698 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13699 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13700 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13701 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13702 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13703 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13704 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13705 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13706 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13707 { MASK, ICODE, NAME, ENUM },
13709 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13710 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13712 static const struct builtin_description bdesc_paired_preds[] =
13714 #include "rs6000-builtin.def"
13717 /* ABS* operations. */
13719 #undef RS6000_BUILTIN_0
13720 #undef RS6000_BUILTIN_1
13721 #undef RS6000_BUILTIN_2
13722 #undef RS6000_BUILTIN_3
13723 #undef RS6000_BUILTIN_A
13724 #undef RS6000_BUILTIN_D
13725 #undef RS6000_BUILTIN_E
13726 #undef RS6000_BUILTIN_H
13727 #undef RS6000_BUILTIN_P
13728 #undef RS6000_BUILTIN_Q
13729 #undef RS6000_BUILTIN_S
13730 #undef RS6000_BUILTIN_X
13732 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13733 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13734 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13735 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13736 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13737 { MASK, ICODE, NAME, ENUM },
13739 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13740 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13741 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13742 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13743 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13744 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13745 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13747 static const struct builtin_description bdesc_abs[] =
13749 #include "rs6000-builtin.def"
13752 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13753 foo (VECa). */
13755 #undef RS6000_BUILTIN_0
13756 #undef RS6000_BUILTIN_1
13757 #undef RS6000_BUILTIN_2
13758 #undef RS6000_BUILTIN_3
13759 #undef RS6000_BUILTIN_A
13760 #undef RS6000_BUILTIN_D
13761 #undef RS6000_BUILTIN_E
13762 #undef RS6000_BUILTIN_H
13763 #undef RS6000_BUILTIN_P
13764 #undef RS6000_BUILTIN_Q
13765 #undef RS6000_BUILTIN_S
13766 #undef RS6000_BUILTIN_X
13768 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13769 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13770 { MASK, ICODE, NAME, ENUM },
13772 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13773 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13774 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13775 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13776 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13777 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13778 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13779 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13780 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13781 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13783 static const struct builtin_description bdesc_1arg[] =
13785 #include "rs6000-builtin.def"
13788 /* Simple no-argument operations: result = __builtin_darn_32 () */
13790 #undef RS6000_BUILTIN_0
13791 #undef RS6000_BUILTIN_1
13792 #undef RS6000_BUILTIN_2
13793 #undef RS6000_BUILTIN_3
13794 #undef RS6000_BUILTIN_A
13795 #undef RS6000_BUILTIN_D
13796 #undef RS6000_BUILTIN_E
13797 #undef RS6000_BUILTIN_H
13798 #undef RS6000_BUILTIN_P
13799 #undef RS6000_BUILTIN_Q
13800 #undef RS6000_BUILTIN_S
13801 #undef RS6000_BUILTIN_X
13803 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13804 { MASK, ICODE, NAME, ENUM },
13806 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13807 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13808 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13809 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13810 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13811 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13812 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13813 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13814 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13815 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13816 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13818 static const struct builtin_description bdesc_0arg[] =
13820 #include "rs6000-builtin.def"
13823 /* HTM builtins. */
13824 #undef RS6000_BUILTIN_0
13825 #undef RS6000_BUILTIN_1
13826 #undef RS6000_BUILTIN_2
13827 #undef RS6000_BUILTIN_3
13828 #undef RS6000_BUILTIN_A
13829 #undef RS6000_BUILTIN_D
13830 #undef RS6000_BUILTIN_E
13831 #undef RS6000_BUILTIN_H
13832 #undef RS6000_BUILTIN_P
13833 #undef RS6000_BUILTIN_Q
13834 #undef RS6000_BUILTIN_S
13835 #undef RS6000_BUILTIN_X
13837 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13838 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13839 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13840 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13841 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13842 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13843 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13844 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13845 { MASK, ICODE, NAME, ENUM },
13847 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13848 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13849 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13850 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13852 static const struct builtin_description bdesc_htm[] =
13854 #include "rs6000-builtin.def"
13857 #undef RS6000_BUILTIN_0
13858 #undef RS6000_BUILTIN_1
13859 #undef RS6000_BUILTIN_2
13860 #undef RS6000_BUILTIN_3
13861 #undef RS6000_BUILTIN_A
13862 #undef RS6000_BUILTIN_D
13863 #undef RS6000_BUILTIN_E
13864 #undef RS6000_BUILTIN_H
13865 #undef RS6000_BUILTIN_P
13866 #undef RS6000_BUILTIN_Q
13867 #undef RS6000_BUILTIN_S
13869 /* Return true if a builtin function is overloaded. */
13870 bool
13871 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13873 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13876 const char *
13877 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13879 return rs6000_builtin_info[(int)fncode].name;
13882 /* Expand an expression EXP that calls a builtin without arguments. */
13883 static rtx
13884 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13886 rtx pat;
13887 machine_mode tmode = insn_data[icode].operand[0].mode;
13889 if (icode == CODE_FOR_nothing)
13890 /* Builtin not supported on this processor. */
13891 return 0;
13893 if (target == 0
13894 || GET_MODE (target) != tmode
13895 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13896 target = gen_reg_rtx (tmode);
13898 pat = GEN_FCN (icode) (target);
13899 if (! pat)
13900 return 0;
13901 emit_insn (pat);
13903 return target;
13907 static rtx
13908 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13910 rtx pat;
13911 tree arg0 = CALL_EXPR_ARG (exp, 0);
13912 tree arg1 = CALL_EXPR_ARG (exp, 1);
13913 rtx op0 = expand_normal (arg0);
13914 rtx op1 = expand_normal (arg1);
13915 machine_mode mode0 = insn_data[icode].operand[0].mode;
13916 machine_mode mode1 = insn_data[icode].operand[1].mode;
13918 if (icode == CODE_FOR_nothing)
13919 /* Builtin not supported on this processor. */
13920 return 0;
13922 /* If we got invalid arguments bail out before generating bad rtl. */
13923 if (arg0 == error_mark_node || arg1 == error_mark_node)
13924 return const0_rtx;
13926 if (GET_CODE (op0) != CONST_INT
13927 || INTVAL (op0) > 255
13928 || INTVAL (op0) < 0)
13930 error ("argument 1 must be an 8-bit field value");
13931 return const0_rtx;
13934 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13935 op0 = copy_to_mode_reg (mode0, op0);
13937 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13938 op1 = copy_to_mode_reg (mode1, op1);
13940 pat = GEN_FCN (icode) (op0, op1);
13941 if (! pat)
13942 return const0_rtx;
13943 emit_insn (pat);
13945 return NULL_RTX;
13948 static rtx
13949 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13951 rtx pat;
13952 tree arg0 = CALL_EXPR_ARG (exp, 0);
13953 rtx op0 = expand_normal (arg0);
13954 machine_mode tmode = insn_data[icode].operand[0].mode;
13955 machine_mode mode0 = insn_data[icode].operand[1].mode;
13957 if (icode == CODE_FOR_nothing)
13958 /* Builtin not supported on this processor. */
13959 return 0;
13961 /* If we got invalid arguments bail out before generating bad rtl. */
13962 if (arg0 == error_mark_node)
13963 return const0_rtx;
13965 if (icode == CODE_FOR_altivec_vspltisb
13966 || icode == CODE_FOR_altivec_vspltish
13967 || icode == CODE_FOR_altivec_vspltisw
13968 || icode == CODE_FOR_spe_evsplatfi
13969 || icode == CODE_FOR_spe_evsplati)
13971 /* Only allow 5-bit *signed* literals. */
13972 if (GET_CODE (op0) != CONST_INT
13973 || INTVAL (op0) > 15
13974 || INTVAL (op0) < -16)
13976 error ("argument 1 must be a 5-bit signed literal");
13977 return const0_rtx;
13981 if (target == 0
13982 || GET_MODE (target) != tmode
13983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13984 target = gen_reg_rtx (tmode);
13986 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13987 op0 = copy_to_mode_reg (mode0, op0);
13989 pat = GEN_FCN (icode) (target, op0);
13990 if (! pat)
13991 return 0;
13992 emit_insn (pat);
13994 return target;
13997 static rtx
13998 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14000 rtx pat, scratch1, scratch2;
14001 tree arg0 = CALL_EXPR_ARG (exp, 0);
14002 rtx op0 = expand_normal (arg0);
14003 machine_mode tmode = insn_data[icode].operand[0].mode;
14004 machine_mode mode0 = insn_data[icode].operand[1].mode;
14006 /* If we have invalid arguments, bail out before generating bad rtl. */
14007 if (arg0 == error_mark_node)
14008 return const0_rtx;
14010 if (target == 0
14011 || GET_MODE (target) != tmode
14012 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14013 target = gen_reg_rtx (tmode);
14015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14016 op0 = copy_to_mode_reg (mode0, op0);
14018 scratch1 = gen_reg_rtx (mode0);
14019 scratch2 = gen_reg_rtx (mode0);
14021 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14022 if (! pat)
14023 return 0;
14024 emit_insn (pat);
14026 return target;
14029 static rtx
14030 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14032 rtx pat;
14033 tree arg0 = CALL_EXPR_ARG (exp, 0);
14034 tree arg1 = CALL_EXPR_ARG (exp, 1);
14035 rtx op0 = expand_normal (arg0);
14036 rtx op1 = expand_normal (arg1);
14037 machine_mode tmode = insn_data[icode].operand[0].mode;
14038 machine_mode mode0 = insn_data[icode].operand[1].mode;
14039 machine_mode mode1 = insn_data[icode].operand[2].mode;
14041 if (icode == CODE_FOR_nothing)
14042 /* Builtin not supported on this processor. */
14043 return 0;
14045 /* If we got invalid arguments bail out before generating bad rtl. */
14046 if (arg0 == error_mark_node || arg1 == error_mark_node)
14047 return const0_rtx;
14049 if (icode == CODE_FOR_altivec_vcfux
14050 || icode == CODE_FOR_altivec_vcfsx
14051 || icode == CODE_FOR_altivec_vctsxs
14052 || icode == CODE_FOR_altivec_vctuxs
14053 || icode == CODE_FOR_altivec_vspltb
14054 || icode == CODE_FOR_altivec_vsplth
14055 || icode == CODE_FOR_altivec_vspltw
14056 || icode == CODE_FOR_spe_evaddiw
14057 || icode == CODE_FOR_spe_evldd
14058 || icode == CODE_FOR_spe_evldh
14059 || icode == CODE_FOR_spe_evldw
14060 || icode == CODE_FOR_spe_evlhhesplat
14061 || icode == CODE_FOR_spe_evlhhossplat
14062 || icode == CODE_FOR_spe_evlhhousplat
14063 || icode == CODE_FOR_spe_evlwhe
14064 || icode == CODE_FOR_spe_evlwhos
14065 || icode == CODE_FOR_spe_evlwhou
14066 || icode == CODE_FOR_spe_evlwhsplat
14067 || icode == CODE_FOR_spe_evlwwsplat
14068 || icode == CODE_FOR_spe_evrlwi
14069 || icode == CODE_FOR_spe_evslwi
14070 || icode == CODE_FOR_spe_evsrwis
14071 || icode == CODE_FOR_spe_evsubifw
14072 || icode == CODE_FOR_spe_evsrwiu)
14074 /* Only allow 5-bit unsigned literals. */
14075 STRIP_NOPS (arg1);
14076 if (TREE_CODE (arg1) != INTEGER_CST
14077 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14079 error ("argument 2 must be a 5-bit unsigned literal");
14080 return const0_rtx;
14083 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14084 || icode == CODE_FOR_dfptstsfi_lt_dd
14085 || icode == CODE_FOR_dfptstsfi_gt_dd
14086 || icode == CODE_FOR_dfptstsfi_unordered_dd
14087 || icode == CODE_FOR_dfptstsfi_eq_td
14088 || icode == CODE_FOR_dfptstsfi_lt_td
14089 || icode == CODE_FOR_dfptstsfi_gt_td
14090 || icode == CODE_FOR_dfptstsfi_unordered_td)
14092 /* Only allow 6-bit unsigned literals. */
14093 STRIP_NOPS (arg0);
14094 if (TREE_CODE (arg0) != INTEGER_CST
14095 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14097 error ("argument 1 must be a 6-bit unsigned literal");
14098 return CONST0_RTX (tmode);
14101 else if (icode == CODE_FOR_xststdcdp
14102 || icode == CODE_FOR_xststdcsp
14103 || icode == CODE_FOR_xvtstdcdp
14104 || icode == CODE_FOR_xvtstdcsp)
14106 /* Only allow 7-bit unsigned literals. */
14107 STRIP_NOPS (arg1);
14108 if (TREE_CODE (arg1) != INTEGER_CST
14109 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14111 error ("argument 2 must be a 7-bit unsigned literal");
14112 return CONST0_RTX (tmode);
14116 if (target == 0
14117 || GET_MODE (target) != tmode
14118 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14119 target = gen_reg_rtx (tmode);
14121 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14122 op0 = copy_to_mode_reg (mode0, op0);
14123 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14124 op1 = copy_to_mode_reg (mode1, op1);
14126 pat = GEN_FCN (icode) (target, op0, op1);
14127 if (! pat)
14128 return 0;
14129 emit_insn (pat);
14131 return target;
14134 static rtx
14135 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14137 rtx pat, scratch;
14138 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14139 tree arg0 = CALL_EXPR_ARG (exp, 1);
14140 tree arg1 = CALL_EXPR_ARG (exp, 2);
14141 rtx op0 = expand_normal (arg0);
14142 rtx op1 = expand_normal (arg1);
14143 machine_mode tmode = SImode;
14144 machine_mode mode0 = insn_data[icode].operand[1].mode;
14145 machine_mode mode1 = insn_data[icode].operand[2].mode;
14146 int cr6_form_int;
14148 if (TREE_CODE (cr6_form) != INTEGER_CST)
14150 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14151 return const0_rtx;
14153 else
14154 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14156 gcc_assert (mode0 == mode1);
14158 /* If we have invalid arguments, bail out before generating bad rtl. */
14159 if (arg0 == error_mark_node || arg1 == error_mark_node)
14160 return const0_rtx;
14162 if (target == 0
14163 || GET_MODE (target) != tmode
14164 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14165 target = gen_reg_rtx (tmode);
14167 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14168 op0 = copy_to_mode_reg (mode0, op0);
14169 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14170 op1 = copy_to_mode_reg (mode1, op1);
14172 scratch = gen_reg_rtx (mode0);
14174 pat = GEN_FCN (icode) (scratch, op0, op1);
14175 if (! pat)
14176 return 0;
14177 emit_insn (pat);
14179 /* The vec_any* and vec_all* predicates use the same opcodes for two
14180 different operations, but the bits in CR6 will be different
14181 depending on what information we want. So we have to play tricks
14182 with CR6 to get the right bits out.
14184 If you think this is disgusting, look at the specs for the
14185 AltiVec predicates. */
14187 switch (cr6_form_int)
14189 case 0:
14190 emit_insn (gen_cr6_test_for_zero (target));
14191 break;
14192 case 1:
14193 emit_insn (gen_cr6_test_for_zero_reverse (target));
14194 break;
14195 case 2:
14196 emit_insn (gen_cr6_test_for_lt (target));
14197 break;
14198 case 3:
14199 emit_insn (gen_cr6_test_for_lt_reverse (target));
14200 break;
14201 default:
14202 error ("argument 1 of __builtin_altivec_predicate is out of range");
14203 break;
14206 return target;
14209 static rtx
14210 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14212 rtx pat, addr;
14213 tree arg0 = CALL_EXPR_ARG (exp, 0);
14214 tree arg1 = CALL_EXPR_ARG (exp, 1);
14215 machine_mode tmode = insn_data[icode].operand[0].mode;
14216 machine_mode mode0 = Pmode;
14217 machine_mode mode1 = Pmode;
14218 rtx op0 = expand_normal (arg0);
14219 rtx op1 = expand_normal (arg1);
14221 if (icode == CODE_FOR_nothing)
14222 /* Builtin not supported on this processor. */
14223 return 0;
14225 /* If we got invalid arguments bail out before generating bad rtl. */
14226 if (arg0 == error_mark_node || arg1 == error_mark_node)
14227 return const0_rtx;
14229 if (target == 0
14230 || GET_MODE (target) != tmode
14231 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14232 target = gen_reg_rtx (tmode);
14234 op1 = copy_to_mode_reg (mode1, op1);
14236 if (op0 == const0_rtx)
14238 addr = gen_rtx_MEM (tmode, op1);
14240 else
14242 op0 = copy_to_mode_reg (mode0, op0);
14243 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14246 pat = GEN_FCN (icode) (target, addr);
14248 if (! pat)
14249 return 0;
14250 emit_insn (pat);
14252 return target;
14255 /* Return a constant vector for use as a little-endian permute control vector
14256 to reverse the order of elements of the given vector mode. */
14257 static rtx
14258 swap_selector_for_mode (machine_mode mode)
14260 /* These are little endian vectors, so their elements are reversed
14261 from what you would normally expect for a permute control vector. */
14262 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14263 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14264 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14265 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14266 unsigned int *swaparray, i;
14267 rtx perm[16];
14269 switch (mode)
14271 case V2DFmode:
14272 case V2DImode:
14273 swaparray = swap2;
14274 break;
14275 case V4SFmode:
14276 case V4SImode:
14277 swaparray = swap4;
14278 break;
14279 case V8HImode:
14280 swaparray = swap8;
14281 break;
14282 case V16QImode:
14283 swaparray = swap16;
14284 break;
14285 default:
14286 gcc_unreachable ();
14289 for (i = 0; i < 16; ++i)
14290 perm[i] = GEN_INT (swaparray[i]);
14292 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14295 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14296 with -maltivec=be specified. Issue the load followed by an element-
14297 reversing permute. */
14298 void
14299 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14301 rtx tmp = gen_reg_rtx (mode);
14302 rtx load = gen_rtx_SET (tmp, op1);
14303 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14304 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14305 rtx sel = swap_selector_for_mode (mode);
14306 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14308 gcc_assert (REG_P (op0));
14309 emit_insn (par);
14310 emit_insn (gen_rtx_SET (op0, vperm));
14313 /* Generate code for a "stvxl" built-in for a little endian target with
14314 -maltivec=be specified. Issue the store preceded by an element-reversing
14315 permute. */
14316 void
14317 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14319 rtx tmp = gen_reg_rtx (mode);
14320 rtx store = gen_rtx_SET (op0, tmp);
14321 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14322 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14323 rtx sel = swap_selector_for_mode (mode);
14324 rtx vperm;
14326 gcc_assert (REG_P (op1));
14327 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14328 emit_insn (gen_rtx_SET (tmp, vperm));
14329 emit_insn (par);
14332 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14333 specified. Issue the store preceded by an element-reversing permute. */
14334 void
14335 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14337 machine_mode inner_mode = GET_MODE_INNER (mode);
14338 rtx tmp = gen_reg_rtx (mode);
14339 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14340 rtx sel = swap_selector_for_mode (mode);
14341 rtx vperm;
14343 gcc_assert (REG_P (op1));
14344 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14345 emit_insn (gen_rtx_SET (tmp, vperm));
14346 emit_insn (gen_rtx_SET (op0, stvx));
14349 static rtx
14350 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14352 rtx pat, addr;
14353 tree arg0 = CALL_EXPR_ARG (exp, 0);
14354 tree arg1 = CALL_EXPR_ARG (exp, 1);
14355 machine_mode tmode = insn_data[icode].operand[0].mode;
14356 machine_mode mode0 = Pmode;
14357 machine_mode mode1 = Pmode;
14358 rtx op0 = expand_normal (arg0);
14359 rtx op1 = expand_normal (arg1);
14361 if (icode == CODE_FOR_nothing)
14362 /* Builtin not supported on this processor. */
14363 return 0;
14365 /* If we got invalid arguments bail out before generating bad rtl. */
14366 if (arg0 == error_mark_node || arg1 == error_mark_node)
14367 return const0_rtx;
14369 if (target == 0
14370 || GET_MODE (target) != tmode
14371 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14372 target = gen_reg_rtx (tmode);
14374 op1 = copy_to_mode_reg (mode1, op1);
14376 /* For LVX, express the RTL accurately by ANDing the address with -16.
14377 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14378 so the raw address is fine. */
14379 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14380 || icode == CODE_FOR_altivec_lvx_v2di_2op
14381 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14382 || icode == CODE_FOR_altivec_lvx_v4si_2op
14383 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14384 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14386 rtx rawaddr;
14387 if (op0 == const0_rtx)
14388 rawaddr = op1;
14389 else
14391 op0 = copy_to_mode_reg (mode0, op0);
14392 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14394 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14395 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14397 /* For -maltivec=be, emit the load and follow it up with a
14398 permute to swap the elements. */
14399 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14401 rtx temp = gen_reg_rtx (tmode);
14402 emit_insn (gen_rtx_SET (temp, addr));
14404 rtx sel = swap_selector_for_mode (tmode);
14405 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14406 UNSPEC_VPERM);
14407 emit_insn (gen_rtx_SET (target, vperm));
14409 else
14410 emit_insn (gen_rtx_SET (target, addr));
14412 else
14414 if (op0 == const0_rtx)
14415 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14416 else
14418 op0 = copy_to_mode_reg (mode0, op0);
14419 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14420 gen_rtx_PLUS (Pmode, op1, op0));
14423 pat = GEN_FCN (icode) (target, addr);
14424 if (! pat)
14425 return 0;
14426 emit_insn (pat);
14429 return target;
14432 static rtx
14433 spe_expand_stv_builtin (enum insn_code icode, tree exp)
14435 tree arg0 = CALL_EXPR_ARG (exp, 0);
14436 tree arg1 = CALL_EXPR_ARG (exp, 1);
14437 tree arg2 = CALL_EXPR_ARG (exp, 2);
14438 rtx op0 = expand_normal (arg0);
14439 rtx op1 = expand_normal (arg1);
14440 rtx op2 = expand_normal (arg2);
14441 rtx pat;
14442 machine_mode mode0 = insn_data[icode].operand[0].mode;
14443 machine_mode mode1 = insn_data[icode].operand[1].mode;
14444 machine_mode mode2 = insn_data[icode].operand[2].mode;
14446 /* Invalid arguments. Bail before doing anything stoopid! */
14447 if (arg0 == error_mark_node
14448 || arg1 == error_mark_node
14449 || arg2 == error_mark_node)
14450 return const0_rtx;
14452 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
14453 op0 = copy_to_mode_reg (mode2, op0);
14454 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
14455 op1 = copy_to_mode_reg (mode0, op1);
14456 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14457 op2 = copy_to_mode_reg (mode1, op2);
14459 pat = GEN_FCN (icode) (op1, op2, op0);
14460 if (pat)
14461 emit_insn (pat);
14462 return NULL_RTX;
14465 static rtx
14466 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14468 tree arg0 = CALL_EXPR_ARG (exp, 0);
14469 tree arg1 = CALL_EXPR_ARG (exp, 1);
14470 tree arg2 = CALL_EXPR_ARG (exp, 2);
14471 rtx op0 = expand_normal (arg0);
14472 rtx op1 = expand_normal (arg1);
14473 rtx op2 = expand_normal (arg2);
14474 rtx pat, addr;
14475 machine_mode tmode = insn_data[icode].operand[0].mode;
14476 machine_mode mode1 = Pmode;
14477 machine_mode mode2 = Pmode;
14479 /* Invalid arguments. Bail before doing anything stoopid! */
14480 if (arg0 == error_mark_node
14481 || arg1 == error_mark_node
14482 || arg2 == error_mark_node)
14483 return const0_rtx;
14485 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14486 op0 = copy_to_mode_reg (tmode, op0);
14488 op2 = copy_to_mode_reg (mode2, op2);
14490 if (op1 == const0_rtx)
14492 addr = gen_rtx_MEM (tmode, op2);
14494 else
14496 op1 = copy_to_mode_reg (mode1, op1);
14497 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14500 pat = GEN_FCN (icode) (addr, op0);
14501 if (pat)
14502 emit_insn (pat);
14503 return NULL_RTX;
14506 static rtx
14507 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14509 tree arg0 = CALL_EXPR_ARG (exp, 0);
14510 tree arg1 = CALL_EXPR_ARG (exp, 1);
14511 tree arg2 = CALL_EXPR_ARG (exp, 2);
14512 rtx op0 = expand_normal (arg0);
14513 rtx op1 = expand_normal (arg1);
14514 rtx op2 = expand_normal (arg2);
14515 rtx pat, addr, rawaddr;
14516 machine_mode tmode = insn_data[icode].operand[0].mode;
14517 machine_mode smode = insn_data[icode].operand[1].mode;
14518 machine_mode mode1 = Pmode;
14519 machine_mode mode2 = Pmode;
14521 /* Invalid arguments. Bail before doing anything stoopid! */
14522 if (arg0 == error_mark_node
14523 || arg1 == error_mark_node
14524 || arg2 == error_mark_node)
14525 return const0_rtx;
14527 op2 = copy_to_mode_reg (mode2, op2);
14529 /* For STVX, express the RTL accurately by ANDing the address with -16.
14530 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14531 so the raw address is fine. */
14532 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14533 || icode == CODE_FOR_altivec_stvx_v2di_2op
14534 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14535 || icode == CODE_FOR_altivec_stvx_v4si_2op
14536 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14537 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14539 if (op1 == const0_rtx)
14540 rawaddr = op2;
14541 else
14543 op1 = copy_to_mode_reg (mode1, op1);
14544 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14547 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14548 addr = gen_rtx_MEM (tmode, addr);
14550 op0 = copy_to_mode_reg (tmode, op0);
14552 /* For -maltivec=be, emit a permute to swap the elements, followed
14553 by the store. */
14554 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14556 rtx temp = gen_reg_rtx (tmode);
14557 rtx sel = swap_selector_for_mode (tmode);
14558 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14559 UNSPEC_VPERM);
14560 emit_insn (gen_rtx_SET (temp, vperm));
14561 emit_insn (gen_rtx_SET (addr, temp));
14563 else
14564 emit_insn (gen_rtx_SET (addr, op0));
14566 else
14568 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14569 op0 = copy_to_mode_reg (smode, op0);
14571 if (op1 == const0_rtx)
14572 addr = gen_rtx_MEM (tmode, op2);
14573 else
14575 op1 = copy_to_mode_reg (mode1, op1);
14576 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14579 pat = GEN_FCN (icode) (addr, op0);
14580 if (pat)
14581 emit_insn (pat);
14584 return NULL_RTX;
14587 /* Return the appropriate SPR number associated with the given builtin. */
14588 static inline HOST_WIDE_INT
14589 htm_spr_num (enum rs6000_builtins code)
14591 if (code == HTM_BUILTIN_GET_TFHAR
14592 || code == HTM_BUILTIN_SET_TFHAR)
14593 return TFHAR_SPR;
14594 else if (code == HTM_BUILTIN_GET_TFIAR
14595 || code == HTM_BUILTIN_SET_TFIAR)
14596 return TFIAR_SPR;
14597 else if (code == HTM_BUILTIN_GET_TEXASR
14598 || code == HTM_BUILTIN_SET_TEXASR)
14599 return TEXASR_SPR;
14600 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14601 || code == HTM_BUILTIN_SET_TEXASRU);
14602 return TEXASRU_SPR;
14605 /* Return the appropriate SPR regno associated with the given builtin. */
14606 static inline HOST_WIDE_INT
14607 htm_spr_regno (enum rs6000_builtins code)
14609 if (code == HTM_BUILTIN_GET_TFHAR
14610 || code == HTM_BUILTIN_SET_TFHAR)
14611 return TFHAR_REGNO;
14612 else if (code == HTM_BUILTIN_GET_TFIAR
14613 || code == HTM_BUILTIN_SET_TFIAR)
14614 return TFIAR_REGNO;
14615 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14616 || code == HTM_BUILTIN_SET_TEXASR
14617 || code == HTM_BUILTIN_GET_TEXASRU
14618 || code == HTM_BUILTIN_SET_TEXASRU);
14619 return TEXASR_REGNO;
14622 /* Return the correct ICODE value depending on whether we are
14623 setting or reading the HTM SPRs. */
14624 static inline enum insn_code
14625 rs6000_htm_spr_icode (bool nonvoid)
14627 if (nonvoid)
14628 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14629 else
14630 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14633 /* Expand the HTM builtin in EXP and store the result in TARGET.
14634 Store true in *EXPANDEDP if we found a builtin to expand. */
14635 static rtx
14636 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14638 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14639 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14640 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14641 const struct builtin_description *d;
14642 size_t i;
14644 *expandedp = true;
14646 if (!TARGET_POWERPC64
14647 && (fcode == HTM_BUILTIN_TABORTDC
14648 || fcode == HTM_BUILTIN_TABORTDCI))
14650 size_t uns_fcode = (size_t)fcode;
14651 const char *name = rs6000_builtin_info[uns_fcode].name;
14652 error ("builtin %s is only valid in 64-bit mode", name);
14653 return const0_rtx;
14656 /* Expand the HTM builtins. */
14657 d = bdesc_htm;
14658 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14659 if (d->code == fcode)
14661 rtx op[MAX_HTM_OPERANDS], pat;
14662 int nopnds = 0;
14663 tree arg;
14664 call_expr_arg_iterator iter;
14665 unsigned attr = rs6000_builtin_info[fcode].attr;
14666 enum insn_code icode = d->icode;
14667 const struct insn_operand_data *insn_op;
14668 bool uses_spr = (attr & RS6000_BTC_SPR);
14669 rtx cr = NULL_RTX;
14671 if (uses_spr)
14672 icode = rs6000_htm_spr_icode (nonvoid);
14673 insn_op = &insn_data[icode].operand[0];
14675 if (nonvoid)
14677 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14678 if (!target
14679 || GET_MODE (target) != tmode
14680 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14681 target = gen_reg_rtx (tmode);
14682 if (uses_spr)
14683 op[nopnds++] = target;
14686 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14688 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14689 return const0_rtx;
14691 insn_op = &insn_data[icode].operand[nopnds];
14693 op[nopnds] = expand_normal (arg);
14695 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14697 if (!strcmp (insn_op->constraint, "n"))
14699 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14700 if (!CONST_INT_P (op[nopnds]))
14701 error ("argument %d must be an unsigned literal", arg_num);
14702 else
14703 error ("argument %d is an unsigned literal that is "
14704 "out of range", arg_num);
14705 return const0_rtx;
14707 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14710 nopnds++;
14713 /* Handle the builtins for extended mnemonics. These accept
14714 no arguments, but map to builtins that take arguments. */
14715 switch (fcode)
14717 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14718 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14719 op[nopnds++] = GEN_INT (1);
14720 if (flag_checking)
14721 attr |= RS6000_BTC_UNARY;
14722 break;
14723 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14724 op[nopnds++] = GEN_INT (0);
14725 if (flag_checking)
14726 attr |= RS6000_BTC_UNARY;
14727 break;
14728 default:
14729 break;
14732 /* If this builtin accesses SPRs, then pass in the appropriate
14733 SPR number and SPR regno as the last two operands. */
14734 if (uses_spr)
14736 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14737 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14738 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14740 /* If this builtin accesses a CR, then pass in a scratch
14741 CR as the last operand. */
14742 else if (attr & RS6000_BTC_CR)
14743 { cr = gen_reg_rtx (CCmode);
14744 op[nopnds++] = cr;
14747 if (flag_checking)
14749 int expected_nopnds = 0;
14750 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14751 expected_nopnds = 1;
14752 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14753 expected_nopnds = 2;
14754 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14755 expected_nopnds = 3;
14756 if (!(attr & RS6000_BTC_VOID))
14757 expected_nopnds += 1;
14758 if (uses_spr)
14759 expected_nopnds += 2;
14761 gcc_assert (nopnds == expected_nopnds
14762 && nopnds <= MAX_HTM_OPERANDS);
14765 switch (nopnds)
14767 case 1:
14768 pat = GEN_FCN (icode) (op[0]);
14769 break;
14770 case 2:
14771 pat = GEN_FCN (icode) (op[0], op[1]);
14772 break;
14773 case 3:
14774 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14775 break;
14776 case 4:
14777 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14778 break;
14779 default:
14780 gcc_unreachable ();
14782 if (!pat)
14783 return NULL_RTX;
14784 emit_insn (pat);
14786 if (attr & RS6000_BTC_CR)
14788 if (fcode == HTM_BUILTIN_TBEGIN)
14790 /* Emit code to set TARGET to true or false depending on
14791 whether the tbegin. instruction successfully or failed
14792 to start a transaction. We do this by placing the 1's
14793 complement of CR's EQ bit into TARGET. */
14794 rtx scratch = gen_reg_rtx (SImode);
14795 emit_insn (gen_rtx_SET (scratch,
14796 gen_rtx_EQ (SImode, cr,
14797 const0_rtx)));
14798 emit_insn (gen_rtx_SET (target,
14799 gen_rtx_XOR (SImode, scratch,
14800 GEN_INT (1))));
14802 else
14804 /* Emit code to copy the 4-bit condition register field
14805 CR into the least significant end of register TARGET. */
14806 rtx scratch1 = gen_reg_rtx (SImode);
14807 rtx scratch2 = gen_reg_rtx (SImode);
14808 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14809 emit_insn (gen_movcc (subreg, cr));
14810 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14811 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14815 if (nonvoid)
14816 return target;
14817 return const0_rtx;
14820 *expandedp = false;
14821 return NULL_RTX;
14824 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14826 static rtx
14827 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14828 rtx target)
14830 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14831 if (fcode == RS6000_BUILTIN_CPU_INIT)
14832 return const0_rtx;
14834 if (target == 0 || GET_MODE (target) != SImode)
14835 target = gen_reg_rtx (SImode);
14837 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14838 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14839 if (TREE_CODE (arg) != STRING_CST)
14841 error ("builtin %s only accepts a string argument",
14842 rs6000_builtin_info[(size_t) fcode].name);
14843 return const0_rtx;
14846 if (fcode == RS6000_BUILTIN_CPU_IS)
14848 const char *cpu = TREE_STRING_POINTER (arg);
14849 rtx cpuid = NULL_RTX;
14850 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14851 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14853 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14854 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14855 break;
14857 if (cpuid == NULL_RTX)
14859 /* Invalid CPU argument. */
14860 error ("cpu %s is an invalid argument to builtin %s",
14861 cpu, rs6000_builtin_info[(size_t) fcode].name);
14862 return const0_rtx;
14865 rtx platform = gen_reg_rtx (SImode);
14866 rtx tcbmem = gen_const_mem (SImode,
14867 gen_rtx_PLUS (Pmode,
14868 gen_rtx_REG (Pmode, TLS_REGNUM),
14869 GEN_INT (TCB_PLATFORM_OFFSET)));
14870 emit_move_insn (platform, tcbmem);
14871 emit_insn (gen_eqsi3 (target, platform, cpuid));
14873 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14875 const char *hwcap = TREE_STRING_POINTER (arg);
14876 rtx mask = NULL_RTX;
14877 int hwcap_offset;
14878 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14879 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14881 mask = GEN_INT (cpu_supports_info[i].mask);
14882 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14883 break;
14885 if (mask == NULL_RTX)
14887 /* Invalid HWCAP argument. */
14888 error ("hwcap %s is an invalid argument to builtin %s",
14889 hwcap, rs6000_builtin_info[(size_t) fcode].name);
14890 return const0_rtx;
14893 rtx tcb_hwcap = gen_reg_rtx (SImode);
14894 rtx tcbmem = gen_const_mem (SImode,
14895 gen_rtx_PLUS (Pmode,
14896 gen_rtx_REG (Pmode, TLS_REGNUM),
14897 GEN_INT (hwcap_offset)));
14898 emit_move_insn (tcb_hwcap, tcbmem);
14899 rtx scratch1 = gen_reg_rtx (SImode);
14900 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14901 rtx scratch2 = gen_reg_rtx (SImode);
14902 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14903 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14906 /* Record that we have expanded a CPU builtin, so that we can later
14907 emit a reference to the special symbol exported by LIBC to ensure we
14908 do not link against an old LIBC that doesn't support this feature. */
14909 cpu_builtin_p = true;
14911 #else
14912 /* For old LIBCs, always return FALSE. */
14913 emit_move_insn (target, GEN_INT (0));
14914 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14916 return target;
14919 static rtx
14920 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14922 rtx pat;
14923 tree arg0 = CALL_EXPR_ARG (exp, 0);
14924 tree arg1 = CALL_EXPR_ARG (exp, 1);
14925 tree arg2 = CALL_EXPR_ARG (exp, 2);
14926 rtx op0 = expand_normal (arg0);
14927 rtx op1 = expand_normal (arg1);
14928 rtx op2 = expand_normal (arg2);
14929 machine_mode tmode = insn_data[icode].operand[0].mode;
14930 machine_mode mode0 = insn_data[icode].operand[1].mode;
14931 machine_mode mode1 = insn_data[icode].operand[2].mode;
14932 machine_mode mode2 = insn_data[icode].operand[3].mode;
14934 if (icode == CODE_FOR_nothing)
14935 /* Builtin not supported on this processor. */
14936 return 0;
14938 /* If we got invalid arguments bail out before generating bad rtl. */
14939 if (arg0 == error_mark_node
14940 || arg1 == error_mark_node
14941 || arg2 == error_mark_node)
14942 return const0_rtx;
14944 /* Check and prepare argument depending on the instruction code.
14946 Note that a switch statement instead of the sequence of tests
14947 would be incorrect as many of the CODE_FOR values could be
14948 CODE_FOR_nothing and that would yield multiple alternatives
14949 with identical values. We'd never reach here at runtime in
14950 this case. */
14951 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14952 || icode == CODE_FOR_altivec_vsldoi_v4si
14953 || icode == CODE_FOR_altivec_vsldoi_v8hi
14954 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14956 /* Only allow 4-bit unsigned literals. */
14957 STRIP_NOPS (arg2);
14958 if (TREE_CODE (arg2) != INTEGER_CST
14959 || TREE_INT_CST_LOW (arg2) & ~0xf)
14961 error ("argument 3 must be a 4-bit unsigned literal");
14962 return const0_rtx;
14965 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14966 || icode == CODE_FOR_vsx_xxpermdi_v2di
14967 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14968 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14969 || icode == CODE_FOR_vsx_xxsldwi_v4si
14970 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14971 || icode == CODE_FOR_vsx_xxsldwi_v2di
14972 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14974 /* Only allow 2-bit unsigned literals. */
14975 STRIP_NOPS (arg2);
14976 if (TREE_CODE (arg2) != INTEGER_CST
14977 || TREE_INT_CST_LOW (arg2) & ~0x3)
14979 error ("argument 3 must be a 2-bit unsigned literal");
14980 return const0_rtx;
14983 else if (icode == CODE_FOR_vsx_set_v2df
14984 || icode == CODE_FOR_vsx_set_v2di
14985 || icode == CODE_FOR_bcdadd
14986 || icode == CODE_FOR_bcdadd_lt
14987 || icode == CODE_FOR_bcdadd_eq
14988 || icode == CODE_FOR_bcdadd_gt
14989 || icode == CODE_FOR_bcdsub
14990 || icode == CODE_FOR_bcdsub_lt
14991 || icode == CODE_FOR_bcdsub_eq
14992 || icode == CODE_FOR_bcdsub_gt)
14994 /* Only allow 1-bit unsigned literals. */
14995 STRIP_NOPS (arg2);
14996 if (TREE_CODE (arg2) != INTEGER_CST
14997 || TREE_INT_CST_LOW (arg2) & ~0x1)
14999 error ("argument 3 must be a 1-bit unsigned literal");
15000 return const0_rtx;
15003 else if (icode == CODE_FOR_dfp_ddedpd_dd
15004 || icode == CODE_FOR_dfp_ddedpd_td)
15006 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15007 STRIP_NOPS (arg0);
15008 if (TREE_CODE (arg0) != INTEGER_CST
15009 || TREE_INT_CST_LOW (arg2) & ~0x3)
15011 error ("argument 1 must be 0 or 2");
15012 return const0_rtx;
15015 else if (icode == CODE_FOR_dfp_denbcd_dd
15016 || icode == CODE_FOR_dfp_denbcd_td)
15018 /* Only allow 1-bit unsigned literals. */
15019 STRIP_NOPS (arg0);
15020 if (TREE_CODE (arg0) != INTEGER_CST
15021 || TREE_INT_CST_LOW (arg0) & ~0x1)
15023 error ("argument 1 must be a 1-bit unsigned literal");
15024 return const0_rtx;
15027 else if (icode == CODE_FOR_dfp_dscli_dd
15028 || icode == CODE_FOR_dfp_dscli_td
15029 || icode == CODE_FOR_dfp_dscri_dd
15030 || icode == CODE_FOR_dfp_dscri_td)
15032 /* Only allow 6-bit unsigned literals. */
15033 STRIP_NOPS (arg1);
15034 if (TREE_CODE (arg1) != INTEGER_CST
15035 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15037 error ("argument 2 must be a 6-bit unsigned literal");
15038 return const0_rtx;
15041 else if (icode == CODE_FOR_crypto_vshasigmaw
15042 || icode == CODE_FOR_crypto_vshasigmad)
15044 /* Check whether the 2nd and 3rd arguments are integer constants and in
15045 range and prepare arguments. */
15046 STRIP_NOPS (arg1);
15047 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15049 error ("argument 2 must be 0 or 1");
15050 return const0_rtx;
15053 STRIP_NOPS (arg2);
15054 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
15056 error ("argument 3 must be in the range 0..15");
15057 return const0_rtx;
15061 if (target == 0
15062 || GET_MODE (target) != tmode
15063 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15064 target = gen_reg_rtx (tmode);
15066 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15067 op0 = copy_to_mode_reg (mode0, op0);
15068 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15069 op1 = copy_to_mode_reg (mode1, op1);
15070 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15071 op2 = copy_to_mode_reg (mode2, op2);
15073 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15074 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15075 else
15076 pat = GEN_FCN (icode) (target, op0, op1, op2);
15077 if (! pat)
15078 return 0;
15079 emit_insn (pat);
15081 return target;
15084 /* Expand the lvx builtins. */
15085 static rtx
15086 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15088 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15089 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15090 tree arg0;
15091 machine_mode tmode, mode0;
15092 rtx pat, op0;
15093 enum insn_code icode;
15095 switch (fcode)
15097 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15098 icode = CODE_FOR_vector_altivec_load_v16qi;
15099 break;
15100 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15101 icode = CODE_FOR_vector_altivec_load_v8hi;
15102 break;
15103 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15104 icode = CODE_FOR_vector_altivec_load_v4si;
15105 break;
15106 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15107 icode = CODE_FOR_vector_altivec_load_v4sf;
15108 break;
15109 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15110 icode = CODE_FOR_vector_altivec_load_v2df;
15111 break;
15112 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15113 icode = CODE_FOR_vector_altivec_load_v2di;
15114 break;
15115 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15116 icode = CODE_FOR_vector_altivec_load_v1ti;
15117 break;
15118 default:
15119 *expandedp = false;
15120 return NULL_RTX;
15123 *expandedp = true;
15125 arg0 = CALL_EXPR_ARG (exp, 0);
15126 op0 = expand_normal (arg0);
15127 tmode = insn_data[icode].operand[0].mode;
15128 mode0 = insn_data[icode].operand[1].mode;
15130 if (target == 0
15131 || GET_MODE (target) != tmode
15132 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15133 target = gen_reg_rtx (tmode);
15135 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15136 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15138 pat = GEN_FCN (icode) (target, op0);
15139 if (! pat)
15140 return 0;
15141 emit_insn (pat);
15142 return target;
15145 /* Expand the stvx builtins. */
15146 static rtx
15147 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15148 bool *expandedp)
15150 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15151 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15152 tree arg0, arg1;
15153 machine_mode mode0, mode1;
15154 rtx pat, op0, op1;
15155 enum insn_code icode;
15157 switch (fcode)
15159 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15160 icode = CODE_FOR_vector_altivec_store_v16qi;
15161 break;
15162 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15163 icode = CODE_FOR_vector_altivec_store_v8hi;
15164 break;
15165 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15166 icode = CODE_FOR_vector_altivec_store_v4si;
15167 break;
15168 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15169 icode = CODE_FOR_vector_altivec_store_v4sf;
15170 break;
15171 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15172 icode = CODE_FOR_vector_altivec_store_v2df;
15173 break;
15174 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15175 icode = CODE_FOR_vector_altivec_store_v2di;
15176 break;
15177 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15178 icode = CODE_FOR_vector_altivec_store_v1ti;
15179 break;
15180 default:
15181 *expandedp = false;
15182 return NULL_RTX;
15185 arg0 = CALL_EXPR_ARG (exp, 0);
15186 arg1 = CALL_EXPR_ARG (exp, 1);
15187 op0 = expand_normal (arg0);
15188 op1 = expand_normal (arg1);
15189 mode0 = insn_data[icode].operand[0].mode;
15190 mode1 = insn_data[icode].operand[1].mode;
15192 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15193 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15194 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15195 op1 = copy_to_mode_reg (mode1, op1);
15197 pat = GEN_FCN (icode) (op0, op1);
15198 if (pat)
15199 emit_insn (pat);
15201 *expandedp = true;
15202 return NULL_RTX;
15205 /* Expand the dst builtins. */
15206 static rtx
15207 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15208 bool *expandedp)
15210 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15211 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15212 tree arg0, arg1, arg2;
15213 machine_mode mode0, mode1;
15214 rtx pat, op0, op1, op2;
15215 const struct builtin_description *d;
15216 size_t i;
15218 *expandedp = false;
15220 /* Handle DST variants. */
15221 d = bdesc_dst;
15222 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15223 if (d->code == fcode)
15225 arg0 = CALL_EXPR_ARG (exp, 0);
15226 arg1 = CALL_EXPR_ARG (exp, 1);
15227 arg2 = CALL_EXPR_ARG (exp, 2);
15228 op0 = expand_normal (arg0);
15229 op1 = expand_normal (arg1);
15230 op2 = expand_normal (arg2);
15231 mode0 = insn_data[d->icode].operand[0].mode;
15232 mode1 = insn_data[d->icode].operand[1].mode;
15234 /* Invalid arguments, bail out before generating bad rtl. */
15235 if (arg0 == error_mark_node
15236 || arg1 == error_mark_node
15237 || arg2 == error_mark_node)
15238 return const0_rtx;
15240 *expandedp = true;
15241 STRIP_NOPS (arg2);
15242 if (TREE_CODE (arg2) != INTEGER_CST
15243 || TREE_INT_CST_LOW (arg2) & ~0x3)
15245 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15246 return const0_rtx;
15249 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15250 op0 = copy_to_mode_reg (Pmode, op0);
15251 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15252 op1 = copy_to_mode_reg (mode1, op1);
15254 pat = GEN_FCN (d->icode) (op0, op1, op2);
15255 if (pat != 0)
15256 emit_insn (pat);
15258 return NULL_RTX;
15261 return NULL_RTX;
15264 /* Expand vec_init builtin. */
15265 static rtx
15266 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15268 machine_mode tmode = TYPE_MODE (type);
15269 machine_mode inner_mode = GET_MODE_INNER (tmode);
15270 int i, n_elt = GET_MODE_NUNITS (tmode);
15272 gcc_assert (VECTOR_MODE_P (tmode));
15273 gcc_assert (n_elt == call_expr_nargs (exp));
15275 if (!target || !register_operand (target, tmode))
15276 target = gen_reg_rtx (tmode);
15278 /* If we have a vector compromised of a single element, such as V1TImode, do
15279 the initialization directly. */
15280 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15282 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15283 emit_move_insn (target, gen_lowpart (tmode, x));
15285 else
15287 rtvec v = rtvec_alloc (n_elt);
15289 for (i = 0; i < n_elt; ++i)
15291 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15292 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15295 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15298 return target;
15301 /* Return the integer constant in ARG. Constrain it to be in the range
15302 of the subparts of VEC_TYPE; issue an error if not. */
15304 static int
15305 get_element_number (tree vec_type, tree arg)
15307 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15309 if (!tree_fits_uhwi_p (arg)
15310 || (elt = tree_to_uhwi (arg), elt > max))
15312 error ("selector must be an integer constant in the range 0..%wi", max);
15313 return 0;
15316 return elt;
15319 /* Expand vec_set builtin. */
15320 static rtx
15321 altivec_expand_vec_set_builtin (tree exp)
15323 machine_mode tmode, mode1;
15324 tree arg0, arg1, arg2;
15325 int elt;
15326 rtx op0, op1;
15328 arg0 = CALL_EXPR_ARG (exp, 0);
15329 arg1 = CALL_EXPR_ARG (exp, 1);
15330 arg2 = CALL_EXPR_ARG (exp, 2);
15332 tmode = TYPE_MODE (TREE_TYPE (arg0));
15333 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15334 gcc_assert (VECTOR_MODE_P (tmode));
15336 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15337 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15338 elt = get_element_number (TREE_TYPE (arg0), arg2);
15340 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15341 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15343 op0 = force_reg (tmode, op0);
15344 op1 = force_reg (mode1, op1);
15346 rs6000_expand_vector_set (op0, op1, elt);
15348 return op0;
15351 /* Expand vec_ext builtin. */
15352 static rtx
15353 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15355 machine_mode tmode, mode0;
15356 tree arg0, arg1;
15357 rtx op0;
15358 rtx op1;
15360 arg0 = CALL_EXPR_ARG (exp, 0);
15361 arg1 = CALL_EXPR_ARG (exp, 1);
15363 op0 = expand_normal (arg0);
15364 op1 = expand_normal (arg1);
15366 /* Call get_element_number to validate arg1 if it is a constant. */
15367 if (TREE_CODE (arg1) == INTEGER_CST)
15368 (void) get_element_number (TREE_TYPE (arg0), arg1);
15370 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15371 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15372 gcc_assert (VECTOR_MODE_P (mode0));
15374 op0 = force_reg (mode0, op0);
15376 if (optimize || !target || !register_operand (target, tmode))
15377 target = gen_reg_rtx (tmode);
15379 rs6000_expand_vector_extract (target, op0, op1);
15381 return target;
15384 /* Expand the builtin in EXP and store the result in TARGET. Store
15385 true in *EXPANDEDP if we found a builtin to expand. */
15386 static rtx
15387 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15389 const struct builtin_description *d;
15390 size_t i;
15391 enum insn_code icode;
15392 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15393 tree arg0;
15394 rtx op0, pat;
15395 machine_mode tmode, mode0;
15396 enum rs6000_builtins fcode
15397 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15399 if (rs6000_overloaded_builtin_p (fcode))
15401 *expandedp = true;
15402 error ("unresolved overload for Altivec builtin %qF", fndecl);
15404 /* Given it is invalid, just generate a normal call. */
15405 return expand_call (exp, target, false);
15408 target = altivec_expand_ld_builtin (exp, target, expandedp);
15409 if (*expandedp)
15410 return target;
15412 target = altivec_expand_st_builtin (exp, target, expandedp);
15413 if (*expandedp)
15414 return target;
15416 target = altivec_expand_dst_builtin (exp, target, expandedp);
15417 if (*expandedp)
15418 return target;
15420 *expandedp = true;
15422 switch (fcode)
15424 case ALTIVEC_BUILTIN_STVX_V2DF:
15425 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15426 case ALTIVEC_BUILTIN_STVX_V2DI:
15427 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15428 case ALTIVEC_BUILTIN_STVX_V4SF:
15429 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15430 case ALTIVEC_BUILTIN_STVX:
15431 case ALTIVEC_BUILTIN_STVX_V4SI:
15432 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15433 case ALTIVEC_BUILTIN_STVX_V8HI:
15434 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15435 case ALTIVEC_BUILTIN_STVX_V16QI:
15436 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15437 case ALTIVEC_BUILTIN_STVEBX:
15438 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15439 case ALTIVEC_BUILTIN_STVEHX:
15440 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15441 case ALTIVEC_BUILTIN_STVEWX:
15442 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15443 case ALTIVEC_BUILTIN_STVXL_V2DF:
15444 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15445 case ALTIVEC_BUILTIN_STVXL_V2DI:
15446 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15447 case ALTIVEC_BUILTIN_STVXL_V4SF:
15448 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15449 case ALTIVEC_BUILTIN_STVXL:
15450 case ALTIVEC_BUILTIN_STVXL_V4SI:
15451 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15452 case ALTIVEC_BUILTIN_STVXL_V8HI:
15453 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15454 case ALTIVEC_BUILTIN_STVXL_V16QI:
15455 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15457 case ALTIVEC_BUILTIN_STVLX:
15458 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15459 case ALTIVEC_BUILTIN_STVLXL:
15460 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15461 case ALTIVEC_BUILTIN_STVRX:
15462 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15463 case ALTIVEC_BUILTIN_STVRXL:
15464 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15466 case VSX_BUILTIN_STXVD2X_V1TI:
15467 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15468 case VSX_BUILTIN_STXVD2X_V2DF:
15469 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15470 case VSX_BUILTIN_STXVD2X_V2DI:
15471 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15472 case VSX_BUILTIN_STXVW4X_V4SF:
15473 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15474 case VSX_BUILTIN_STXVW4X_V4SI:
15475 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15476 case VSX_BUILTIN_STXVW4X_V8HI:
15477 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15478 case VSX_BUILTIN_STXVW4X_V16QI:
15479 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15481 /* For the following on big endian, it's ok to use any appropriate
15482 unaligned-supporting store, so use a generic expander. For
15483 little-endian, the exact element-reversing instruction must
15484 be used. */
15485 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15487 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15488 : CODE_FOR_vsx_st_elemrev_v2df);
15489 return altivec_expand_stv_builtin (code, exp);
15491 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15493 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15494 : CODE_FOR_vsx_st_elemrev_v2di);
15495 return altivec_expand_stv_builtin (code, exp);
15497 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15499 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15500 : CODE_FOR_vsx_st_elemrev_v4sf);
15501 return altivec_expand_stv_builtin (code, exp);
15503 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15505 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15506 : CODE_FOR_vsx_st_elemrev_v4si);
15507 return altivec_expand_stv_builtin (code, exp);
15509 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15511 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15512 : CODE_FOR_vsx_st_elemrev_v8hi);
15513 return altivec_expand_stv_builtin (code, exp);
15515 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15517 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15518 : CODE_FOR_vsx_st_elemrev_v16qi);
15519 return altivec_expand_stv_builtin (code, exp);
15522 case ALTIVEC_BUILTIN_MFVSCR:
15523 icode = CODE_FOR_altivec_mfvscr;
15524 tmode = insn_data[icode].operand[0].mode;
15526 if (target == 0
15527 || GET_MODE (target) != tmode
15528 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15529 target = gen_reg_rtx (tmode);
15531 pat = GEN_FCN (icode) (target);
15532 if (! pat)
15533 return 0;
15534 emit_insn (pat);
15535 return target;
15537 case ALTIVEC_BUILTIN_MTVSCR:
15538 icode = CODE_FOR_altivec_mtvscr;
15539 arg0 = CALL_EXPR_ARG (exp, 0);
15540 op0 = expand_normal (arg0);
15541 mode0 = insn_data[icode].operand[0].mode;
15543 /* If we got invalid arguments bail out before generating bad rtl. */
15544 if (arg0 == error_mark_node)
15545 return const0_rtx;
15547 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15548 op0 = copy_to_mode_reg (mode0, op0);
15550 pat = GEN_FCN (icode) (op0);
15551 if (pat)
15552 emit_insn (pat);
15553 return NULL_RTX;
15555 case ALTIVEC_BUILTIN_DSSALL:
15556 emit_insn (gen_altivec_dssall ());
15557 return NULL_RTX;
15559 case ALTIVEC_BUILTIN_DSS:
15560 icode = CODE_FOR_altivec_dss;
15561 arg0 = CALL_EXPR_ARG (exp, 0);
15562 STRIP_NOPS (arg0);
15563 op0 = expand_normal (arg0);
15564 mode0 = insn_data[icode].operand[0].mode;
15566 /* If we got invalid arguments bail out before generating bad rtl. */
15567 if (arg0 == error_mark_node)
15568 return const0_rtx;
15570 if (TREE_CODE (arg0) != INTEGER_CST
15571 || TREE_INT_CST_LOW (arg0) & ~0x3)
15573 error ("argument to dss must be a 2-bit unsigned literal");
15574 return const0_rtx;
15577 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15578 op0 = copy_to_mode_reg (mode0, op0);
15580 emit_insn (gen_altivec_dss (op0));
15581 return NULL_RTX;
15583 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15584 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15585 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15586 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15587 case VSX_BUILTIN_VEC_INIT_V2DF:
15588 case VSX_BUILTIN_VEC_INIT_V2DI:
15589 case VSX_BUILTIN_VEC_INIT_V1TI:
15590 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15592 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15593 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15594 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15595 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15596 case VSX_BUILTIN_VEC_SET_V2DF:
15597 case VSX_BUILTIN_VEC_SET_V2DI:
15598 case VSX_BUILTIN_VEC_SET_V1TI:
15599 return altivec_expand_vec_set_builtin (exp);
15601 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15602 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15603 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15604 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15605 case VSX_BUILTIN_VEC_EXT_V2DF:
15606 case VSX_BUILTIN_VEC_EXT_V2DI:
15607 case VSX_BUILTIN_VEC_EXT_V1TI:
15608 return altivec_expand_vec_ext_builtin (exp, target);
15610 default:
15611 break;
15612 /* Fall through. */
15615 /* Expand abs* operations. */
15616 d = bdesc_abs;
15617 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15618 if (d->code == fcode)
15619 return altivec_expand_abs_builtin (d->icode, exp, target);
15621 /* Expand the AltiVec predicates. */
15622 d = bdesc_altivec_preds;
15623 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15624 if (d->code == fcode)
15625 return altivec_expand_predicate_builtin (d->icode, exp, target);
15627 /* LV* are funky. We initialized them differently. */
15628 switch (fcode)
15630 case ALTIVEC_BUILTIN_LVSL:
15631 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15632 exp, target, false);
15633 case ALTIVEC_BUILTIN_LVSR:
15634 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15635 exp, target, false);
15636 case ALTIVEC_BUILTIN_LVEBX:
15637 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15638 exp, target, false);
15639 case ALTIVEC_BUILTIN_LVEHX:
15640 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15641 exp, target, false);
15642 case ALTIVEC_BUILTIN_LVEWX:
15643 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15644 exp, target, false);
15645 case ALTIVEC_BUILTIN_LVXL_V2DF:
15646 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15647 exp, target, false);
15648 case ALTIVEC_BUILTIN_LVXL_V2DI:
15649 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15650 exp, target, false);
15651 case ALTIVEC_BUILTIN_LVXL_V4SF:
15652 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15653 exp, target, false);
15654 case ALTIVEC_BUILTIN_LVXL:
15655 case ALTIVEC_BUILTIN_LVXL_V4SI:
15656 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15657 exp, target, false);
15658 case ALTIVEC_BUILTIN_LVXL_V8HI:
15659 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15660 exp, target, false);
15661 case ALTIVEC_BUILTIN_LVXL_V16QI:
15662 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15663 exp, target, false);
15664 case ALTIVEC_BUILTIN_LVX_V2DF:
15665 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15666 exp, target, false);
15667 case ALTIVEC_BUILTIN_LVX_V2DI:
15668 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15669 exp, target, false);
15670 case ALTIVEC_BUILTIN_LVX_V4SF:
15671 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15672 exp, target, false);
15673 case ALTIVEC_BUILTIN_LVX:
15674 case ALTIVEC_BUILTIN_LVX_V4SI:
15675 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15676 exp, target, false);
15677 case ALTIVEC_BUILTIN_LVX_V8HI:
15678 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15679 exp, target, false);
15680 case ALTIVEC_BUILTIN_LVX_V16QI:
15681 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15682 exp, target, false);
15683 case ALTIVEC_BUILTIN_LVLX:
15684 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15685 exp, target, true);
15686 case ALTIVEC_BUILTIN_LVLXL:
15687 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15688 exp, target, true);
15689 case ALTIVEC_BUILTIN_LVRX:
15690 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15691 exp, target, true);
15692 case ALTIVEC_BUILTIN_LVRXL:
15693 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15694 exp, target, true);
15695 case VSX_BUILTIN_LXVD2X_V1TI:
15696 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15697 exp, target, false);
15698 case VSX_BUILTIN_LXVD2X_V2DF:
15699 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15700 exp, target, false);
15701 case VSX_BUILTIN_LXVD2X_V2DI:
15702 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15703 exp, target, false);
15704 case VSX_BUILTIN_LXVW4X_V4SF:
15705 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15706 exp, target, false);
15707 case VSX_BUILTIN_LXVW4X_V4SI:
15708 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15709 exp, target, false);
15710 case VSX_BUILTIN_LXVW4X_V8HI:
15711 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15712 exp, target, false);
15713 case VSX_BUILTIN_LXVW4X_V16QI:
15714 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15715 exp, target, false);
15716 /* For the following on big endian, it's ok to use any appropriate
15717 unaligned-supporting load, so use a generic expander. For
15718 little-endian, the exact element-reversing instruction must
15719 be used. */
15720 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15722 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15723 : CODE_FOR_vsx_ld_elemrev_v2df);
15724 return altivec_expand_lv_builtin (code, exp, target, false);
15726 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15728 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15729 : CODE_FOR_vsx_ld_elemrev_v2di);
15730 return altivec_expand_lv_builtin (code, exp, target, false);
15732 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15734 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15735 : CODE_FOR_vsx_ld_elemrev_v4sf);
15736 return altivec_expand_lv_builtin (code, exp, target, false);
15738 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15740 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15741 : CODE_FOR_vsx_ld_elemrev_v4si);
15742 return altivec_expand_lv_builtin (code, exp, target, false);
15744 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15746 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15747 : CODE_FOR_vsx_ld_elemrev_v8hi);
15748 return altivec_expand_lv_builtin (code, exp, target, false);
15750 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15752 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15753 : CODE_FOR_vsx_ld_elemrev_v16qi);
15754 return altivec_expand_lv_builtin (code, exp, target, false);
15756 break;
15757 default:
15758 break;
15759 /* Fall through. */
15762 *expandedp = false;
15763 return NULL_RTX;
15766 /* Expand the builtin in EXP and store the result in TARGET. Store
15767 true in *EXPANDEDP if we found a builtin to expand. */
15768 static rtx
15769 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15771 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15772 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15773 const struct builtin_description *d;
15774 size_t i;
15776 *expandedp = true;
15778 switch (fcode)
15780 case PAIRED_BUILTIN_STX:
15781 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15782 case PAIRED_BUILTIN_LX:
15783 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15784 default:
15785 break;
15786 /* Fall through. */
15789 /* Expand the paired predicates. */
15790 d = bdesc_paired_preds;
15791 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15792 if (d->code == fcode)
15793 return paired_expand_predicate_builtin (d->icode, exp, target);
15795 *expandedp = false;
15796 return NULL_RTX;
15799 /* Binops that need to be initialized manually, but can be expanded
15800 automagically by rs6000_expand_binop_builtin. */
15801 static const struct builtin_description bdesc_2arg_spe[] =
15803 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
15804 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
15805 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
15806 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
15807 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
15808 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
15809 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
15810 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
15811 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
15812 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
15813 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
15814 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
15815 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
15816 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
15817 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
15818 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
15819 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
15820 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
15821 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
15822 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
15823 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
15824 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
15827 /* Expand the builtin in EXP and store the result in TARGET. Store
15828 true in *EXPANDEDP if we found a builtin to expand.
15830 This expands the SPE builtins that are not simple unary and binary
15831 operations. */
15832 static rtx
15833 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
15835 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15836 tree arg1, arg0;
15837 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15838 enum insn_code icode;
15839 machine_mode tmode, mode0;
15840 rtx pat, op0;
15841 const struct builtin_description *d;
15842 size_t i;
15844 *expandedp = true;
15846 /* Syntax check for a 5-bit unsigned immediate. */
15847 switch (fcode)
15849 case SPE_BUILTIN_EVSTDD:
15850 case SPE_BUILTIN_EVSTDH:
15851 case SPE_BUILTIN_EVSTDW:
15852 case SPE_BUILTIN_EVSTWHE:
15853 case SPE_BUILTIN_EVSTWHO:
15854 case SPE_BUILTIN_EVSTWWE:
15855 case SPE_BUILTIN_EVSTWWO:
15856 arg1 = CALL_EXPR_ARG (exp, 2);
15857 if (TREE_CODE (arg1) != INTEGER_CST
15858 || TREE_INT_CST_LOW (arg1) & ~0x1f)
15860 error ("argument 2 must be a 5-bit unsigned literal");
15861 return const0_rtx;
15863 break;
15864 default:
15865 break;
15868 /* The evsplat*i instructions are not quite generic. */
15869 switch (fcode)
15871 case SPE_BUILTIN_EVSPLATFI:
15872 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
15873 exp, target);
15874 case SPE_BUILTIN_EVSPLATI:
15875 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
15876 exp, target);
15877 default:
15878 break;
15881 d = bdesc_2arg_spe;
15882 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
15883 if (d->code == fcode)
15884 return rs6000_expand_binop_builtin (d->icode, exp, target);
15886 d = bdesc_spe_predicates;
15887 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
15888 if (d->code == fcode)
15889 return spe_expand_predicate_builtin (d->icode, exp, target);
15891 d = bdesc_spe_evsel;
15892 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
15893 if (d->code == fcode)
15894 return spe_expand_evsel_builtin (d->icode, exp, target);
15896 switch (fcode)
15898 case SPE_BUILTIN_EVSTDDX:
15899 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
15900 case SPE_BUILTIN_EVSTDHX:
15901 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
15902 case SPE_BUILTIN_EVSTDWX:
15903 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
15904 case SPE_BUILTIN_EVSTWHEX:
15905 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
15906 case SPE_BUILTIN_EVSTWHOX:
15907 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
15908 case SPE_BUILTIN_EVSTWWEX:
15909 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
15910 case SPE_BUILTIN_EVSTWWOX:
15911 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
15912 case SPE_BUILTIN_EVSTDD:
15913 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
15914 case SPE_BUILTIN_EVSTDH:
15915 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
15916 case SPE_BUILTIN_EVSTDW:
15917 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
15918 case SPE_BUILTIN_EVSTWHE:
15919 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
15920 case SPE_BUILTIN_EVSTWHO:
15921 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
15922 case SPE_BUILTIN_EVSTWWE:
15923 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
15924 case SPE_BUILTIN_EVSTWWO:
15925 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
15926 case SPE_BUILTIN_MFSPEFSCR:
15927 icode = CODE_FOR_spe_mfspefscr;
15928 tmode = insn_data[icode].operand[0].mode;
15930 if (target == 0
15931 || GET_MODE (target) != tmode
15932 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15933 target = gen_reg_rtx (tmode);
15935 pat = GEN_FCN (icode) (target);
15936 if (! pat)
15937 return 0;
15938 emit_insn (pat);
15939 return target;
15940 case SPE_BUILTIN_MTSPEFSCR:
15941 icode = CODE_FOR_spe_mtspefscr;
15942 arg0 = CALL_EXPR_ARG (exp, 0);
15943 op0 = expand_normal (arg0);
15944 mode0 = insn_data[icode].operand[0].mode;
15946 if (arg0 == error_mark_node)
15947 return const0_rtx;
15949 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15950 op0 = copy_to_mode_reg (mode0, op0);
15952 pat = GEN_FCN (icode) (op0);
15953 if (pat)
15954 emit_insn (pat);
15955 return NULL_RTX;
15956 default:
15957 break;
15960 *expandedp = false;
15961 return NULL_RTX;
15964 static rtx
15965 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15967 rtx pat, scratch, tmp;
15968 tree form = CALL_EXPR_ARG (exp, 0);
15969 tree arg0 = CALL_EXPR_ARG (exp, 1);
15970 tree arg1 = CALL_EXPR_ARG (exp, 2);
15971 rtx op0 = expand_normal (arg0);
15972 rtx op1 = expand_normal (arg1);
15973 machine_mode mode0 = insn_data[icode].operand[1].mode;
15974 machine_mode mode1 = insn_data[icode].operand[2].mode;
15975 int form_int;
15976 enum rtx_code code;
15978 if (TREE_CODE (form) != INTEGER_CST)
15980 error ("argument 1 of __builtin_paired_predicate must be a constant");
15981 return const0_rtx;
15983 else
15984 form_int = TREE_INT_CST_LOW (form);
15986 gcc_assert (mode0 == mode1);
15988 if (arg0 == error_mark_node || arg1 == error_mark_node)
15989 return const0_rtx;
15991 if (target == 0
15992 || GET_MODE (target) != SImode
15993 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
15994 target = gen_reg_rtx (SImode);
15995 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15996 op0 = copy_to_mode_reg (mode0, op0);
15997 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15998 op1 = copy_to_mode_reg (mode1, op1);
16000 scratch = gen_reg_rtx (CCFPmode);
16002 pat = GEN_FCN (icode) (scratch, op0, op1);
16003 if (!pat)
16004 return const0_rtx;
16006 emit_insn (pat);
16008 switch (form_int)
16010 /* LT bit. */
16011 case 0:
16012 code = LT;
16013 break;
16014 /* GT bit. */
16015 case 1:
16016 code = GT;
16017 break;
16018 /* EQ bit. */
16019 case 2:
16020 code = EQ;
16021 break;
16022 /* UN bit. */
16023 case 3:
16024 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16025 return target;
16026 default:
16027 error ("argument 1 of __builtin_paired_predicate is out of range");
16028 return const0_rtx;
16031 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16032 emit_move_insn (target, tmp);
16033 return target;
16036 static rtx
16037 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16039 rtx pat, scratch, tmp;
16040 tree form = CALL_EXPR_ARG (exp, 0);
16041 tree arg0 = CALL_EXPR_ARG (exp, 1);
16042 tree arg1 = CALL_EXPR_ARG (exp, 2);
16043 rtx op0 = expand_normal (arg0);
16044 rtx op1 = expand_normal (arg1);
16045 machine_mode mode0 = insn_data[icode].operand[1].mode;
16046 machine_mode mode1 = insn_data[icode].operand[2].mode;
16047 int form_int;
16048 enum rtx_code code;
16050 if (TREE_CODE (form) != INTEGER_CST)
16052 error ("argument 1 of __builtin_spe_predicate must be a constant");
16053 return const0_rtx;
16055 else
16056 form_int = TREE_INT_CST_LOW (form);
16058 gcc_assert (mode0 == mode1);
16060 if (arg0 == error_mark_node || arg1 == error_mark_node)
16061 return const0_rtx;
16063 if (target == 0
16064 || GET_MODE (target) != SImode
16065 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16066 target = gen_reg_rtx (SImode);
16068 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16069 op0 = copy_to_mode_reg (mode0, op0);
16070 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16071 op1 = copy_to_mode_reg (mode1, op1);
16073 scratch = gen_reg_rtx (CCmode);
16075 pat = GEN_FCN (icode) (scratch, op0, op1);
16076 if (! pat)
16077 return const0_rtx;
16078 emit_insn (pat);
16080 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16081 _lower_. We use one compare, but look in different bits of the
16082 CR for each variant.
16084 There are 2 elements in each SPE simd type (upper/lower). The CR
16085 bits are set as follows:
16087 BIT0 | BIT 1 | BIT 2 | BIT 3
16088 U | L | (U | L) | (U & L)
16090 So, for an "all" relationship, BIT 3 would be set.
16091 For an "any" relationship, BIT 2 would be set. Etc.
16093 Following traditional nomenclature, these bits map to:
16095 BIT0 | BIT 1 | BIT 2 | BIT 3
16096 LT | GT | EQ | OV
16098 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16101 switch (form_int)
16103 /* All variant. OV bit. */
16104 case 0:
16105 /* We need to get to the OV bit, which is the ORDERED bit. We
16106 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16107 that's ugly and will make validate_condition_mode die.
16108 So let's just use another pattern. */
16109 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16110 return target;
16111 /* Any variant. EQ bit. */
16112 case 1:
16113 code = EQ;
16114 break;
16115 /* Upper variant. LT bit. */
16116 case 2:
16117 code = LT;
16118 break;
16119 /* Lower variant. GT bit. */
16120 case 3:
16121 code = GT;
16122 break;
16123 default:
16124 error ("argument 1 of __builtin_spe_predicate is out of range");
16125 return const0_rtx;
16128 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16129 emit_move_insn (target, tmp);
16131 return target;
16134 /* The evsel builtins look like this:
16136 e = __builtin_spe_evsel_OP (a, b, c, d);
16138 and work like this:
16140 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16141 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16144 static rtx
16145 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16147 rtx pat, scratch;
16148 tree arg0 = CALL_EXPR_ARG (exp, 0);
16149 tree arg1 = CALL_EXPR_ARG (exp, 1);
16150 tree arg2 = CALL_EXPR_ARG (exp, 2);
16151 tree arg3 = CALL_EXPR_ARG (exp, 3);
16152 rtx op0 = expand_normal (arg0);
16153 rtx op1 = expand_normal (arg1);
16154 rtx op2 = expand_normal (arg2);
16155 rtx op3 = expand_normal (arg3);
16156 machine_mode mode0 = insn_data[icode].operand[1].mode;
16157 machine_mode mode1 = insn_data[icode].operand[2].mode;
16159 gcc_assert (mode0 == mode1);
16161 if (arg0 == error_mark_node || arg1 == error_mark_node
16162 || arg2 == error_mark_node || arg3 == error_mark_node)
16163 return const0_rtx;
16165 if (target == 0
16166 || GET_MODE (target) != mode0
16167 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16168 target = gen_reg_rtx (mode0);
16170 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16171 op0 = copy_to_mode_reg (mode0, op0);
16172 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16173 op1 = copy_to_mode_reg (mode0, op1);
16174 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
16175 op2 = copy_to_mode_reg (mode0, op2);
16176 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
16177 op3 = copy_to_mode_reg (mode0, op3);
16179 /* Generate the compare. */
16180 scratch = gen_reg_rtx (CCmode);
16181 pat = GEN_FCN (icode) (scratch, op0, op1);
16182 if (! pat)
16183 return const0_rtx;
16184 emit_insn (pat);
16186 if (mode0 == V2SImode)
16187 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
16188 else
16189 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
16191 return target;
16194 /* Raise an error message for a builtin function that is called without the
16195 appropriate target options being set. */
16197 static void
16198 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16200 size_t uns_fncode = (size_t)fncode;
16201 const char *name = rs6000_builtin_info[uns_fncode].name;
16202 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16204 gcc_assert (name != NULL);
16205 if ((fnmask & RS6000_BTM_CELL) != 0)
16206 error ("Builtin function %s is only valid for the cell processor", name);
16207 else if ((fnmask & RS6000_BTM_VSX) != 0)
16208 error ("Builtin function %s requires the -mvsx option", name);
16209 else if ((fnmask & RS6000_BTM_HTM) != 0)
16210 error ("Builtin function %s requires the -mhtm option", name);
16211 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16212 error ("Builtin function %s requires the -maltivec option", name);
16213 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16214 error ("Builtin function %s requires the -mpaired option", name);
16215 else if ((fnmask & RS6000_BTM_SPE) != 0)
16216 error ("Builtin function %s requires the -mspe option", name);
16217 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16218 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16219 error ("Builtin function %s requires the -mhard-dfp and"
16220 " -mpower8-vector options", name);
16221 else if ((fnmask & RS6000_BTM_DFP) != 0)
16222 error ("Builtin function %s requires the -mhard-dfp option", name);
16223 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16224 error ("Builtin function %s requires the -mpower8-vector option", name);
16225 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16226 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16227 error ("Builtin function %s requires the -mcpu=power9 and"
16228 " -m64 options", name);
16229 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16230 error ("Builtin function %s requires the -mcpu=power9 option", name);
16231 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16232 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16233 error ("Builtin function %s requires the -mcpu=power9 and"
16234 " -m64 options", name);
16235 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16236 error ("Builtin function %s requires the -mcpu=power9 option", name);
16237 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16238 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16239 error ("Builtin function %s requires the -mhard-float and"
16240 " -mlong-double-128 options", name);
16241 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16242 error ("Builtin function %s requires the -mhard-float option", name);
16243 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16244 error ("Builtin function %s requires the -mfloat128 option", name);
16245 else
16246 error ("Builtin function %s is not supported with the current options",
16247 name);
16250 /* Target hook for early folding of built-ins, shamelessly stolen
16251 from ia64.c. */
16253 static tree
16254 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16255 tree *args, bool ignore ATTRIBUTE_UNUSED)
16257 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16259 enum rs6000_builtins fn_code
16260 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16261 switch (fn_code)
16263 case RS6000_BUILTIN_NANQ:
16264 case RS6000_BUILTIN_NANSQ:
16266 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16267 const char *str = c_getstr (*args);
16268 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16269 REAL_VALUE_TYPE real;
16271 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16272 return build_real (type, real);
16273 return NULL_TREE;
16275 case RS6000_BUILTIN_INFQ:
16276 case RS6000_BUILTIN_HUGE_VALQ:
16278 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16279 REAL_VALUE_TYPE inf;
16280 real_inf (&inf);
16281 return build_real (type, inf);
16283 default:
16284 break;
16287 #ifdef SUBTARGET_FOLD_BUILTIN
16288 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16289 #else
16290 return NULL_TREE;
16291 #endif
16294 /* Expand an expression EXP that calls a built-in function,
16295 with result going to TARGET if that's convenient
16296 (and in mode MODE if that's convenient).
16297 SUBTARGET may be used as the target for computing one of EXP's operands.
16298 IGNORE is nonzero if the value is to be ignored. */
16300 static rtx
16301 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16302 machine_mode mode ATTRIBUTE_UNUSED,
16303 int ignore ATTRIBUTE_UNUSED)
16305 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16306 enum rs6000_builtins fcode
16307 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16308 size_t uns_fcode = (size_t)fcode;
16309 const struct builtin_description *d;
16310 size_t i;
16311 rtx ret;
16312 bool success;
16313 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16314 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16316 if (TARGET_DEBUG_BUILTIN)
16318 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16319 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16320 const char *name2 = ((icode != CODE_FOR_nothing)
16321 ? get_insn_name ((int)icode)
16322 : "nothing");
16323 const char *name3;
16325 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16327 default: name3 = "unknown"; break;
16328 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16329 case RS6000_BTC_UNARY: name3 = "unary"; break;
16330 case RS6000_BTC_BINARY: name3 = "binary"; break;
16331 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16332 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16333 case RS6000_BTC_ABS: name3 = "abs"; break;
16334 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
16335 case RS6000_BTC_DST: name3 = "dst"; break;
16339 fprintf (stderr,
16340 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16341 (name1) ? name1 : "---", fcode,
16342 (name2) ? name2 : "---", (int)icode,
16343 name3,
16344 func_valid_p ? "" : ", not valid");
16347 if (!func_valid_p)
16349 rs6000_invalid_builtin (fcode);
16351 /* Given it is invalid, just generate a normal call. */
16352 return expand_call (exp, target, ignore);
16355 switch (fcode)
16357 case RS6000_BUILTIN_RECIP:
16358 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16360 case RS6000_BUILTIN_RECIPF:
16361 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16363 case RS6000_BUILTIN_RSQRTF:
16364 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16366 case RS6000_BUILTIN_RSQRT:
16367 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16369 case POWER7_BUILTIN_BPERMD:
16370 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16371 ? CODE_FOR_bpermd_di
16372 : CODE_FOR_bpermd_si), exp, target);
16374 case RS6000_BUILTIN_GET_TB:
16375 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16376 target);
16378 case RS6000_BUILTIN_MFTB:
16379 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16380 ? CODE_FOR_rs6000_mftb_di
16381 : CODE_FOR_rs6000_mftb_si),
16382 target);
16384 case RS6000_BUILTIN_MFFS:
16385 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16387 case RS6000_BUILTIN_MTFSF:
16388 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16390 case RS6000_BUILTIN_CPU_INIT:
16391 case RS6000_BUILTIN_CPU_IS:
16392 case RS6000_BUILTIN_CPU_SUPPORTS:
16393 return cpu_expand_builtin (fcode, exp, target);
16395 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16396 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16398 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16399 : (int) CODE_FOR_altivec_lvsl_direct);
16400 machine_mode tmode = insn_data[icode].operand[0].mode;
16401 machine_mode mode = insn_data[icode].operand[1].mode;
16402 tree arg;
16403 rtx op, addr, pat;
16405 gcc_assert (TARGET_ALTIVEC);
16407 arg = CALL_EXPR_ARG (exp, 0);
16408 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16409 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16410 addr = memory_address (mode, op);
16411 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16412 op = addr;
16413 else
16415 /* For the load case need to negate the address. */
16416 op = gen_reg_rtx (GET_MODE (addr));
16417 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16419 op = gen_rtx_MEM (mode, op);
16421 if (target == 0
16422 || GET_MODE (target) != tmode
16423 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16424 target = gen_reg_rtx (tmode);
16426 pat = GEN_FCN (icode) (target, op);
16427 if (!pat)
16428 return 0;
16429 emit_insn (pat);
16431 return target;
16434 case ALTIVEC_BUILTIN_VCFUX:
16435 case ALTIVEC_BUILTIN_VCFSX:
16436 case ALTIVEC_BUILTIN_VCTUXS:
16437 case ALTIVEC_BUILTIN_VCTSXS:
16438 /* FIXME: There's got to be a nicer way to handle this case than
16439 constructing a new CALL_EXPR. */
16440 if (call_expr_nargs (exp) == 1)
16442 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16443 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16445 break;
16447 default:
16448 break;
16451 if (TARGET_ALTIVEC)
16453 ret = altivec_expand_builtin (exp, target, &success);
16455 if (success)
16456 return ret;
16458 if (TARGET_SPE)
16460 ret = spe_expand_builtin (exp, target, &success);
16462 if (success)
16463 return ret;
16465 if (TARGET_PAIRED_FLOAT)
16467 ret = paired_expand_builtin (exp, target, &success);
16469 if (success)
16470 return ret;
16472 if (TARGET_HTM)
16474 ret = htm_expand_builtin (exp, target, &success);
16476 if (success)
16477 return ret;
16480 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16481 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16482 gcc_assert (attr == RS6000_BTC_UNARY
16483 || attr == RS6000_BTC_BINARY
16484 || attr == RS6000_BTC_TERNARY
16485 || attr == RS6000_BTC_SPECIAL);
16487 /* Handle simple unary operations. */
16488 d = bdesc_1arg;
16489 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16490 if (d->code == fcode)
16491 return rs6000_expand_unop_builtin (d->icode, exp, target);
16493 /* Handle simple binary operations. */
16494 d = bdesc_2arg;
16495 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16496 if (d->code == fcode)
16497 return rs6000_expand_binop_builtin (d->icode, exp, target);
16499 /* Handle simple ternary operations. */
16500 d = bdesc_3arg;
16501 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16502 if (d->code == fcode)
16503 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16505 /* Handle simple no-argument operations. */
16506 d = bdesc_0arg;
16507 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16508 if (d->code == fcode)
16509 return rs6000_expand_zeroop_builtin (d->icode, target);
16511 gcc_unreachable ();
16514 static void
16515 rs6000_init_builtins (void)
16517 tree tdecl;
16518 tree ftype;
16519 machine_mode mode;
16521 if (TARGET_DEBUG_BUILTIN)
16522 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
16523 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16524 (TARGET_SPE) ? ", spe" : "",
16525 (TARGET_ALTIVEC) ? ", altivec" : "",
16526 (TARGET_VSX) ? ", vsx" : "");
16528 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16529 V2SF_type_node = build_vector_type (float_type_node, 2);
16530 V2DI_type_node = build_vector_type (intDI_type_node, 2);
16531 V2DF_type_node = build_vector_type (double_type_node, 2);
16532 V4HI_type_node = build_vector_type (intHI_type_node, 4);
16533 V4SI_type_node = build_vector_type (intSI_type_node, 4);
16534 V4SF_type_node = build_vector_type (float_type_node, 4);
16535 V8HI_type_node = build_vector_type (intHI_type_node, 8);
16536 V16QI_type_node = build_vector_type (intQI_type_node, 16);
16538 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
16539 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
16540 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
16541 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
16543 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16544 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16545 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16546 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16548 const_str_type_node
16549 = build_pointer_type (build_qualified_type (char_type_node,
16550 TYPE_QUAL_CONST));
16552 /* We use V1TI mode as a special container to hold __int128_t items that
16553 must live in VSX registers. */
16554 if (intTI_type_node)
16556 V1TI_type_node = build_vector_type (intTI_type_node, 1);
16557 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
16560 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16561 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16562 'vector unsigned short'. */
16564 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16565 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16566 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16567 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16568 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16570 long_integer_type_internal_node = long_integer_type_node;
16571 long_unsigned_type_internal_node = long_unsigned_type_node;
16572 long_long_integer_type_internal_node = long_long_integer_type_node;
16573 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16574 intQI_type_internal_node = intQI_type_node;
16575 uintQI_type_internal_node = unsigned_intQI_type_node;
16576 intHI_type_internal_node = intHI_type_node;
16577 uintHI_type_internal_node = unsigned_intHI_type_node;
16578 intSI_type_internal_node = intSI_type_node;
16579 uintSI_type_internal_node = unsigned_intSI_type_node;
16580 intDI_type_internal_node = intDI_type_node;
16581 uintDI_type_internal_node = unsigned_intDI_type_node;
16582 intTI_type_internal_node = intTI_type_node;
16583 uintTI_type_internal_node = unsigned_intTI_type_node;
16584 float_type_internal_node = float_type_node;
16585 double_type_internal_node = double_type_node;
16586 long_double_type_internal_node = long_double_type_node;
16587 dfloat64_type_internal_node = dfloat64_type_node;
16588 dfloat128_type_internal_node = dfloat128_type_node;
16589 void_type_internal_node = void_type_node;
16591 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16592 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16593 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16594 format that uses a pair of doubles, depending on the switches and
16595 defaults.
16597 We do not enable the actual __float128 keyword unless the user explicitly
16598 asks for it, because the library support is not yet complete.
16600 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16601 floating point, we need make sure the type is non-zero or else self-test
16602 fails during bootstrap.
16604 We don't register a built-in type for __ibm128 if the type is the same as
16605 long double. Instead we add a #define for __ibm128 in
16606 rs6000_cpu_cpp_builtins to long double. */
16607 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
16609 ibm128_float_type_node = make_node (REAL_TYPE);
16610 TYPE_PRECISION (ibm128_float_type_node) = 128;
16611 layout_type (ibm128_float_type_node);
16612 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16614 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16615 "__ibm128");
16617 else
16618 ibm128_float_type_node = long_double_type_node;
16620 if (TARGET_FLOAT128_KEYWORD)
16622 ieee128_float_type_node = float128_type_node;
16623 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16624 "__float128");
16627 else if (TARGET_FLOAT128_TYPE)
16629 ieee128_float_type_node = make_node (REAL_TYPE);
16630 TYPE_PRECISION (ibm128_float_type_node) = 128;
16631 layout_type (ieee128_float_type_node);
16632 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
16634 /* If we are not exporting the __float128/_Float128 keywords, we need a
16635 keyword to get the types created. Use __ieee128 as the dummy
16636 keyword. */
16637 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16638 "__ieee128");
16641 else
16642 ieee128_float_type_node = long_double_type_node;
16644 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16645 tree type node. */
16646 builtin_mode_to_type[QImode][0] = integer_type_node;
16647 builtin_mode_to_type[HImode][0] = integer_type_node;
16648 builtin_mode_to_type[SImode][0] = intSI_type_node;
16649 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16650 builtin_mode_to_type[DImode][0] = intDI_type_node;
16651 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16652 builtin_mode_to_type[TImode][0] = intTI_type_node;
16653 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16654 builtin_mode_to_type[SFmode][0] = float_type_node;
16655 builtin_mode_to_type[DFmode][0] = double_type_node;
16656 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16657 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16658 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16659 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16660 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16661 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16662 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16663 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
16664 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
16665 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16666 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16667 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16668 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
16669 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16670 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16671 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16672 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16673 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16674 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16675 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16677 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16678 TYPE_NAME (bool_char_type_node) = tdecl;
16680 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16681 TYPE_NAME (bool_short_type_node) = tdecl;
16683 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16684 TYPE_NAME (bool_int_type_node) = tdecl;
16686 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16687 TYPE_NAME (pixel_type_node) = tdecl;
16689 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
16690 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
16691 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
16692 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
16693 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
16695 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
16696 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
16698 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
16699 TYPE_NAME (V16QI_type_node) = tdecl;
16701 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
16702 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
16704 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
16705 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
16707 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
16708 TYPE_NAME (V8HI_type_node) = tdecl;
16710 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
16711 TYPE_NAME (bool_V8HI_type_node) = tdecl;
16713 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
16714 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
16716 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
16717 TYPE_NAME (V4SI_type_node) = tdecl;
16719 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
16720 TYPE_NAME (bool_V4SI_type_node) = tdecl;
16722 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
16723 TYPE_NAME (V4SF_type_node) = tdecl;
16725 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
16726 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
16728 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
16729 TYPE_NAME (V2DF_type_node) = tdecl;
16731 if (TARGET_POWERPC64)
16733 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
16734 TYPE_NAME (V2DI_type_node) = tdecl;
16736 tdecl = add_builtin_type ("__vector unsigned long",
16737 unsigned_V2DI_type_node);
16738 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16740 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
16741 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16743 else
16745 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
16746 TYPE_NAME (V2DI_type_node) = tdecl;
16748 tdecl = add_builtin_type ("__vector unsigned long long",
16749 unsigned_V2DI_type_node);
16750 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16752 tdecl = add_builtin_type ("__vector __bool long long",
16753 bool_V2DI_type_node);
16754 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16757 if (V1TI_type_node)
16759 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
16760 TYPE_NAME (V1TI_type_node) = tdecl;
16762 tdecl = add_builtin_type ("__vector unsigned __int128",
16763 unsigned_V1TI_type_node);
16764 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
16767 /* Paired and SPE builtins are only available if you build a compiler with
16768 the appropriate options, so only create those builtins with the
16769 appropriate compiler option. Create Altivec and VSX builtins on machines
16770 with at least the general purpose extensions (970 and newer) to allow the
16771 use of the target attribute. */
16772 if (TARGET_PAIRED_FLOAT)
16773 paired_init_builtins ();
16774 if (TARGET_SPE)
16775 spe_init_builtins ();
16776 if (TARGET_EXTRA_BUILTINS)
16777 altivec_init_builtins ();
16778 if (TARGET_HTM)
16779 htm_init_builtins ();
16781 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16782 rs6000_common_init_builtins ();
16784 ftype = build_function_type_list (ieee128_float_type_node,
16785 const_str_type_node, NULL_TREE);
16786 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16787 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16789 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16790 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16791 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16793 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16794 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16795 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16797 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16798 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16799 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16801 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16802 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16803 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16805 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16806 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16807 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16809 mode = (TARGET_64BIT) ? DImode : SImode;
16810 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16811 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16812 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16814 ftype = build_function_type_list (unsigned_intDI_type_node,
16815 NULL_TREE);
16816 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16818 if (TARGET_64BIT)
16819 ftype = build_function_type_list (unsigned_intDI_type_node,
16820 NULL_TREE);
16821 else
16822 ftype = build_function_type_list (unsigned_intSI_type_node,
16823 NULL_TREE);
16824 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16826 ftype = build_function_type_list (double_type_node, NULL_TREE);
16827 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16829 ftype = build_function_type_list (void_type_node,
16830 intSI_type_node, double_type_node,
16831 NULL_TREE);
16832 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16834 ftype = build_function_type_list (void_type_node, NULL_TREE);
16835 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16837 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16838 NULL_TREE);
16839 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16840 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16842 #if TARGET_XCOFF
16843 /* AIX libm provides clog as __clog. */
16844 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16845 set_user_assembler_name (tdecl, "__clog");
16846 #endif
16848 #ifdef SUBTARGET_INIT_BUILTINS
16849 SUBTARGET_INIT_BUILTINS;
16850 #endif
16853 /* Returns the rs6000 builtin decl for CODE. */
16855 static tree
16856 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16858 HOST_WIDE_INT fnmask;
16860 if (code >= RS6000_BUILTIN_COUNT)
16861 return error_mark_node;
16863 fnmask = rs6000_builtin_info[code].mask;
16864 if ((fnmask & rs6000_builtin_mask) != fnmask)
16866 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16867 return error_mark_node;
16870 return rs6000_builtin_decls[code];
16873 static void
16874 spe_init_builtins (void)
16876 tree puint_type_node = build_pointer_type (unsigned_type_node);
16877 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
16878 const struct builtin_description *d;
16879 size_t i;
16881 tree v2si_ftype_4_v2si
16882 = build_function_type_list (opaque_V2SI_type_node,
16883 opaque_V2SI_type_node,
16884 opaque_V2SI_type_node,
16885 opaque_V2SI_type_node,
16886 opaque_V2SI_type_node,
16887 NULL_TREE);
16889 tree v2sf_ftype_4_v2sf
16890 = build_function_type_list (opaque_V2SF_type_node,
16891 opaque_V2SF_type_node,
16892 opaque_V2SF_type_node,
16893 opaque_V2SF_type_node,
16894 opaque_V2SF_type_node,
16895 NULL_TREE);
16897 tree int_ftype_int_v2si_v2si
16898 = build_function_type_list (integer_type_node,
16899 integer_type_node,
16900 opaque_V2SI_type_node,
16901 opaque_V2SI_type_node,
16902 NULL_TREE);
16904 tree int_ftype_int_v2sf_v2sf
16905 = build_function_type_list (integer_type_node,
16906 integer_type_node,
16907 opaque_V2SF_type_node,
16908 opaque_V2SF_type_node,
16909 NULL_TREE);
16911 tree void_ftype_v2si_puint_int
16912 = build_function_type_list (void_type_node,
16913 opaque_V2SI_type_node,
16914 puint_type_node,
16915 integer_type_node,
16916 NULL_TREE);
16918 tree void_ftype_v2si_puint_char
16919 = build_function_type_list (void_type_node,
16920 opaque_V2SI_type_node,
16921 puint_type_node,
16922 char_type_node,
16923 NULL_TREE);
16925 tree void_ftype_v2si_pv2si_int
16926 = build_function_type_list (void_type_node,
16927 opaque_V2SI_type_node,
16928 opaque_p_V2SI_type_node,
16929 integer_type_node,
16930 NULL_TREE);
16932 tree void_ftype_v2si_pv2si_char
16933 = build_function_type_list (void_type_node,
16934 opaque_V2SI_type_node,
16935 opaque_p_V2SI_type_node,
16936 char_type_node,
16937 NULL_TREE);
16939 tree void_ftype_int
16940 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16942 tree int_ftype_void
16943 = build_function_type_list (integer_type_node, NULL_TREE);
16945 tree v2si_ftype_pv2si_int
16946 = build_function_type_list (opaque_V2SI_type_node,
16947 opaque_p_V2SI_type_node,
16948 integer_type_node,
16949 NULL_TREE);
16951 tree v2si_ftype_puint_int
16952 = build_function_type_list (opaque_V2SI_type_node,
16953 puint_type_node,
16954 integer_type_node,
16955 NULL_TREE);
16957 tree v2si_ftype_pushort_int
16958 = build_function_type_list (opaque_V2SI_type_node,
16959 pushort_type_node,
16960 integer_type_node,
16961 NULL_TREE);
16963 tree v2si_ftype_signed_char
16964 = build_function_type_list (opaque_V2SI_type_node,
16965 signed_char_type_node,
16966 NULL_TREE);
16968 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
16970 /* Initialize irregular SPE builtins. */
16972 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
16973 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
16974 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
16975 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
16976 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
16977 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
16978 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
16979 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
16980 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
16981 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
16982 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
16983 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
16984 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
16985 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
16986 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
16987 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
16988 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
16989 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
16991 /* Loads. */
16992 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
16993 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
16994 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
16995 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
16996 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
16997 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
16998 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
16999 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
17000 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
17001 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
17002 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
17003 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
17004 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
17005 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
17006 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
17007 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
17008 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
17009 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
17010 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
17011 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
17012 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
17013 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
17015 /* Predicates. */
17016 d = bdesc_spe_predicates;
17017 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
17019 tree type;
17021 switch (insn_data[d->icode].operand[1].mode)
17023 case V2SImode:
17024 type = int_ftype_int_v2si_v2si;
17025 break;
17026 case V2SFmode:
17027 type = int_ftype_int_v2sf_v2sf;
17028 break;
17029 default:
17030 gcc_unreachable ();
17033 def_builtin (d->name, type, d->code);
17036 /* Evsel predicates. */
17037 d = bdesc_spe_evsel;
17038 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
17040 tree type;
17042 switch (insn_data[d->icode].operand[1].mode)
17044 case V2SImode:
17045 type = v2si_ftype_4_v2si;
17046 break;
17047 case V2SFmode:
17048 type = v2sf_ftype_4_v2sf;
17049 break;
17050 default:
17051 gcc_unreachable ();
17054 def_builtin (d->name, type, d->code);
17058 static void
17059 paired_init_builtins (void)
17061 const struct builtin_description *d;
17062 size_t i;
17064 tree int_ftype_int_v2sf_v2sf
17065 = build_function_type_list (integer_type_node,
17066 integer_type_node,
17067 V2SF_type_node,
17068 V2SF_type_node,
17069 NULL_TREE);
17070 tree pcfloat_type_node =
17071 build_pointer_type (build_qualified_type
17072 (float_type_node, TYPE_QUAL_CONST));
17074 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17075 long_integer_type_node,
17076 pcfloat_type_node,
17077 NULL_TREE);
17078 tree void_ftype_v2sf_long_pcfloat =
17079 build_function_type_list (void_type_node,
17080 V2SF_type_node,
17081 long_integer_type_node,
17082 pcfloat_type_node,
17083 NULL_TREE);
17086 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17087 PAIRED_BUILTIN_LX);
17090 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17091 PAIRED_BUILTIN_STX);
17093 /* Predicates. */
17094 d = bdesc_paired_preds;
17095 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17097 tree type;
17099 if (TARGET_DEBUG_BUILTIN)
17100 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17101 (int)i, get_insn_name (d->icode), (int)d->icode,
17102 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17104 switch (insn_data[d->icode].operand[1].mode)
17106 case V2SFmode:
17107 type = int_ftype_int_v2sf_v2sf;
17108 break;
17109 default:
17110 gcc_unreachable ();
17113 def_builtin (d->name, type, d->code);
17117 static void
17118 altivec_init_builtins (void)
17120 const struct builtin_description *d;
17121 size_t i;
17122 tree ftype;
17123 tree decl;
17125 tree pvoid_type_node = build_pointer_type (void_type_node);
17127 tree pcvoid_type_node
17128 = build_pointer_type (build_qualified_type (void_type_node,
17129 TYPE_QUAL_CONST));
17131 tree int_ftype_opaque
17132 = build_function_type_list (integer_type_node,
17133 opaque_V4SI_type_node, NULL_TREE);
17134 tree opaque_ftype_opaque
17135 = build_function_type_list (integer_type_node, NULL_TREE);
17136 tree opaque_ftype_opaque_int
17137 = build_function_type_list (opaque_V4SI_type_node,
17138 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17139 tree opaque_ftype_opaque_opaque_int
17140 = build_function_type_list (opaque_V4SI_type_node,
17141 opaque_V4SI_type_node, opaque_V4SI_type_node,
17142 integer_type_node, NULL_TREE);
17143 tree opaque_ftype_opaque_opaque_opaque
17144 = build_function_type_list (opaque_V4SI_type_node,
17145 opaque_V4SI_type_node, opaque_V4SI_type_node,
17146 opaque_V4SI_type_node, NULL_TREE);
17147 tree opaque_ftype_opaque_opaque
17148 = build_function_type_list (opaque_V4SI_type_node,
17149 opaque_V4SI_type_node, opaque_V4SI_type_node,
17150 NULL_TREE);
17151 tree int_ftype_int_opaque_opaque
17152 = build_function_type_list (integer_type_node,
17153 integer_type_node, opaque_V4SI_type_node,
17154 opaque_V4SI_type_node, NULL_TREE);
17155 tree int_ftype_int_v4si_v4si
17156 = build_function_type_list (integer_type_node,
17157 integer_type_node, V4SI_type_node,
17158 V4SI_type_node, NULL_TREE);
17159 tree int_ftype_int_v2di_v2di
17160 = build_function_type_list (integer_type_node,
17161 integer_type_node, V2DI_type_node,
17162 V2DI_type_node, NULL_TREE);
17163 tree void_ftype_v4si
17164 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17165 tree v8hi_ftype_void
17166 = build_function_type_list (V8HI_type_node, NULL_TREE);
17167 tree void_ftype_void
17168 = build_function_type_list (void_type_node, NULL_TREE);
17169 tree void_ftype_int
17170 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17172 tree opaque_ftype_long_pcvoid
17173 = build_function_type_list (opaque_V4SI_type_node,
17174 long_integer_type_node, pcvoid_type_node,
17175 NULL_TREE);
17176 tree v16qi_ftype_long_pcvoid
17177 = build_function_type_list (V16QI_type_node,
17178 long_integer_type_node, pcvoid_type_node,
17179 NULL_TREE);
17180 tree v8hi_ftype_long_pcvoid
17181 = build_function_type_list (V8HI_type_node,
17182 long_integer_type_node, pcvoid_type_node,
17183 NULL_TREE);
17184 tree v4si_ftype_long_pcvoid
17185 = build_function_type_list (V4SI_type_node,
17186 long_integer_type_node, pcvoid_type_node,
17187 NULL_TREE);
17188 tree v4sf_ftype_long_pcvoid
17189 = build_function_type_list (V4SF_type_node,
17190 long_integer_type_node, pcvoid_type_node,
17191 NULL_TREE);
17192 tree v2df_ftype_long_pcvoid
17193 = build_function_type_list (V2DF_type_node,
17194 long_integer_type_node, pcvoid_type_node,
17195 NULL_TREE);
17196 tree v2di_ftype_long_pcvoid
17197 = build_function_type_list (V2DI_type_node,
17198 long_integer_type_node, pcvoid_type_node,
17199 NULL_TREE);
17201 tree void_ftype_opaque_long_pvoid
17202 = build_function_type_list (void_type_node,
17203 opaque_V4SI_type_node, long_integer_type_node,
17204 pvoid_type_node, NULL_TREE);
17205 tree void_ftype_v4si_long_pvoid
17206 = build_function_type_list (void_type_node,
17207 V4SI_type_node, long_integer_type_node,
17208 pvoid_type_node, NULL_TREE);
17209 tree void_ftype_v16qi_long_pvoid
17210 = build_function_type_list (void_type_node,
17211 V16QI_type_node, long_integer_type_node,
17212 pvoid_type_node, NULL_TREE);
17213 tree void_ftype_v8hi_long_pvoid
17214 = build_function_type_list (void_type_node,
17215 V8HI_type_node, long_integer_type_node,
17216 pvoid_type_node, NULL_TREE);
17217 tree void_ftype_v4sf_long_pvoid
17218 = build_function_type_list (void_type_node,
17219 V4SF_type_node, long_integer_type_node,
17220 pvoid_type_node, NULL_TREE);
17221 tree void_ftype_v2df_long_pvoid
17222 = build_function_type_list (void_type_node,
17223 V2DF_type_node, long_integer_type_node,
17224 pvoid_type_node, NULL_TREE);
17225 tree void_ftype_v2di_long_pvoid
17226 = build_function_type_list (void_type_node,
17227 V2DI_type_node, long_integer_type_node,
17228 pvoid_type_node, NULL_TREE);
17229 tree int_ftype_int_v8hi_v8hi
17230 = build_function_type_list (integer_type_node,
17231 integer_type_node, V8HI_type_node,
17232 V8HI_type_node, NULL_TREE);
17233 tree int_ftype_int_v16qi_v16qi
17234 = build_function_type_list (integer_type_node,
17235 integer_type_node, V16QI_type_node,
17236 V16QI_type_node, NULL_TREE);
17237 tree int_ftype_int_v4sf_v4sf
17238 = build_function_type_list (integer_type_node,
17239 integer_type_node, V4SF_type_node,
17240 V4SF_type_node, NULL_TREE);
17241 tree int_ftype_int_v2df_v2df
17242 = build_function_type_list (integer_type_node,
17243 integer_type_node, V2DF_type_node,
17244 V2DF_type_node, NULL_TREE);
17245 tree v2di_ftype_v2di
17246 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17247 tree v4si_ftype_v4si
17248 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17249 tree v8hi_ftype_v8hi
17250 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17251 tree v16qi_ftype_v16qi
17252 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17253 tree v4sf_ftype_v4sf
17254 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17255 tree v2df_ftype_v2df
17256 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17257 tree void_ftype_pcvoid_int_int
17258 = build_function_type_list (void_type_node,
17259 pcvoid_type_node, integer_type_node,
17260 integer_type_node, NULL_TREE);
17262 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17263 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17264 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17265 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17266 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17267 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17268 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17269 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17270 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17271 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17272 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17273 ALTIVEC_BUILTIN_LVXL_V2DF);
17274 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17275 ALTIVEC_BUILTIN_LVXL_V2DI);
17276 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17277 ALTIVEC_BUILTIN_LVXL_V4SF);
17278 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17279 ALTIVEC_BUILTIN_LVXL_V4SI);
17280 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17281 ALTIVEC_BUILTIN_LVXL_V8HI);
17282 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17283 ALTIVEC_BUILTIN_LVXL_V16QI);
17284 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17285 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17286 ALTIVEC_BUILTIN_LVX_V2DF);
17287 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17288 ALTIVEC_BUILTIN_LVX_V2DI);
17289 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17290 ALTIVEC_BUILTIN_LVX_V4SF);
17291 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17292 ALTIVEC_BUILTIN_LVX_V4SI);
17293 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17294 ALTIVEC_BUILTIN_LVX_V8HI);
17295 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17296 ALTIVEC_BUILTIN_LVX_V16QI);
17297 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17298 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17299 ALTIVEC_BUILTIN_STVX_V2DF);
17300 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17301 ALTIVEC_BUILTIN_STVX_V2DI);
17302 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17303 ALTIVEC_BUILTIN_STVX_V4SF);
17304 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17305 ALTIVEC_BUILTIN_STVX_V4SI);
17306 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17307 ALTIVEC_BUILTIN_STVX_V8HI);
17308 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17309 ALTIVEC_BUILTIN_STVX_V16QI);
17310 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17311 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17312 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17313 ALTIVEC_BUILTIN_STVXL_V2DF);
17314 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17315 ALTIVEC_BUILTIN_STVXL_V2DI);
17316 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17317 ALTIVEC_BUILTIN_STVXL_V4SF);
17318 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17319 ALTIVEC_BUILTIN_STVXL_V4SI);
17320 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17321 ALTIVEC_BUILTIN_STVXL_V8HI);
17322 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17323 ALTIVEC_BUILTIN_STVXL_V16QI);
17324 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17325 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17326 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17327 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17328 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17329 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17330 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17331 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17332 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17333 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17334 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17335 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17336 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17337 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17338 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17339 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17341 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17342 VSX_BUILTIN_LXVD2X_V2DF);
17343 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17344 VSX_BUILTIN_LXVD2X_V2DI);
17345 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17346 VSX_BUILTIN_LXVW4X_V4SF);
17347 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17348 VSX_BUILTIN_LXVW4X_V4SI);
17349 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17350 VSX_BUILTIN_LXVW4X_V8HI);
17351 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17352 VSX_BUILTIN_LXVW4X_V16QI);
17353 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17354 VSX_BUILTIN_STXVD2X_V2DF);
17355 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17356 VSX_BUILTIN_STXVD2X_V2DI);
17357 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17358 VSX_BUILTIN_STXVW4X_V4SF);
17359 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17360 VSX_BUILTIN_STXVW4X_V4SI);
17361 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17362 VSX_BUILTIN_STXVW4X_V8HI);
17363 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17364 VSX_BUILTIN_STXVW4X_V16QI);
17366 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17367 VSX_BUILTIN_LD_ELEMREV_V2DF);
17368 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17369 VSX_BUILTIN_LD_ELEMREV_V2DI);
17370 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17371 VSX_BUILTIN_LD_ELEMREV_V4SF);
17372 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17373 VSX_BUILTIN_LD_ELEMREV_V4SI);
17374 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17375 VSX_BUILTIN_ST_ELEMREV_V2DF);
17376 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17377 VSX_BUILTIN_ST_ELEMREV_V2DI);
17378 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17379 VSX_BUILTIN_ST_ELEMREV_V4SF);
17380 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17381 VSX_BUILTIN_ST_ELEMREV_V4SI);
17383 if (TARGET_P9_VECTOR)
17385 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17386 VSX_BUILTIN_LD_ELEMREV_V8HI);
17387 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17388 VSX_BUILTIN_LD_ELEMREV_V16QI);
17389 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17390 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17391 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17392 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17395 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17396 VSX_BUILTIN_VEC_LD);
17397 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17398 VSX_BUILTIN_VEC_ST);
17399 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17400 VSX_BUILTIN_VEC_XL);
17401 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17402 VSX_BUILTIN_VEC_XST);
17404 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17405 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17406 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17408 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17409 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17410 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17411 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17412 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17413 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17414 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17415 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17416 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17417 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17418 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17419 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17421 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17422 ALTIVEC_BUILTIN_VEC_ADDE);
17423 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17424 ALTIVEC_BUILTIN_VEC_ADDEC);
17425 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17426 ALTIVEC_BUILTIN_VEC_CMPNE);
17427 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17428 ALTIVEC_BUILTIN_VEC_MUL);
17430 /* Cell builtins. */
17431 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17432 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17433 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17434 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17436 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17437 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17438 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17439 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17441 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17442 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17443 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17444 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17446 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17447 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17448 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17449 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17451 /* Add the DST variants. */
17452 d = bdesc_dst;
17453 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17454 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17456 /* Initialize the predicates. */
17457 d = bdesc_altivec_preds;
17458 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17460 machine_mode mode1;
17461 tree type;
17463 if (rs6000_overloaded_builtin_p (d->code))
17464 mode1 = VOIDmode;
17465 else
17466 mode1 = insn_data[d->icode].operand[1].mode;
17468 switch (mode1)
17470 case VOIDmode:
17471 type = int_ftype_int_opaque_opaque;
17472 break;
17473 case V2DImode:
17474 type = int_ftype_int_v2di_v2di;
17475 break;
17476 case V4SImode:
17477 type = int_ftype_int_v4si_v4si;
17478 break;
17479 case V8HImode:
17480 type = int_ftype_int_v8hi_v8hi;
17481 break;
17482 case V16QImode:
17483 type = int_ftype_int_v16qi_v16qi;
17484 break;
17485 case V4SFmode:
17486 type = int_ftype_int_v4sf_v4sf;
17487 break;
17488 case V2DFmode:
17489 type = int_ftype_int_v2df_v2df;
17490 break;
17491 default:
17492 gcc_unreachable ();
17495 def_builtin (d->name, type, d->code);
17498 /* Initialize the abs* operators. */
17499 d = bdesc_abs;
17500 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17502 machine_mode mode0;
17503 tree type;
17505 mode0 = insn_data[d->icode].operand[0].mode;
17507 switch (mode0)
17509 case V2DImode:
17510 type = v2di_ftype_v2di;
17511 break;
17512 case V4SImode:
17513 type = v4si_ftype_v4si;
17514 break;
17515 case V8HImode:
17516 type = v8hi_ftype_v8hi;
17517 break;
17518 case V16QImode:
17519 type = v16qi_ftype_v16qi;
17520 break;
17521 case V4SFmode:
17522 type = v4sf_ftype_v4sf;
17523 break;
17524 case V2DFmode:
17525 type = v2df_ftype_v2df;
17526 break;
17527 default:
17528 gcc_unreachable ();
17531 def_builtin (d->name, type, d->code);
17534 /* Initialize target builtin that implements
17535 targetm.vectorize.builtin_mask_for_load. */
17537 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17538 v16qi_ftype_long_pcvoid,
17539 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17540 BUILT_IN_MD, NULL, NULL_TREE);
17541 TREE_READONLY (decl) = 1;
17542 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17543 altivec_builtin_mask_for_load = decl;
17545 /* Access to the vec_init patterns. */
17546 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17547 integer_type_node, integer_type_node,
17548 integer_type_node, NULL_TREE);
17549 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17551 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17552 short_integer_type_node,
17553 short_integer_type_node,
17554 short_integer_type_node,
17555 short_integer_type_node,
17556 short_integer_type_node,
17557 short_integer_type_node,
17558 short_integer_type_node, NULL_TREE);
17559 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17561 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17562 char_type_node, char_type_node,
17563 char_type_node, char_type_node,
17564 char_type_node, char_type_node,
17565 char_type_node, char_type_node,
17566 char_type_node, char_type_node,
17567 char_type_node, char_type_node,
17568 char_type_node, char_type_node,
17569 char_type_node, NULL_TREE);
17570 def_builtin ("__builtin_vec_init_v16qi", ftype,
17571 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17573 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17574 float_type_node, float_type_node,
17575 float_type_node, NULL_TREE);
17576 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17578 /* VSX builtins. */
17579 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17580 double_type_node, NULL_TREE);
17581 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17583 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17584 intDI_type_node, NULL_TREE);
17585 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17587 /* Access to the vec_set patterns. */
17588 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17589 intSI_type_node,
17590 integer_type_node, NULL_TREE);
17591 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17593 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17594 intHI_type_node,
17595 integer_type_node, NULL_TREE);
17596 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17598 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17599 intQI_type_node,
17600 integer_type_node, NULL_TREE);
17601 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17603 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17604 float_type_node,
17605 integer_type_node, NULL_TREE);
17606 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17608 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17609 double_type_node,
17610 integer_type_node, NULL_TREE);
17611 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17613 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17614 intDI_type_node,
17615 integer_type_node, NULL_TREE);
17616 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17618 /* Access to the vec_extract patterns. */
17619 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17620 integer_type_node, NULL_TREE);
17621 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17623 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17624 integer_type_node, NULL_TREE);
17625 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17627 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17628 integer_type_node, NULL_TREE);
17629 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17631 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17632 integer_type_node, NULL_TREE);
17633 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17635 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17636 integer_type_node, NULL_TREE);
17637 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17639 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17640 integer_type_node, NULL_TREE);
17641 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17644 if (V1TI_type_node)
17646 tree v1ti_ftype_long_pcvoid
17647 = build_function_type_list (V1TI_type_node,
17648 long_integer_type_node, pcvoid_type_node,
17649 NULL_TREE);
17650 tree void_ftype_v1ti_long_pvoid
17651 = build_function_type_list (void_type_node,
17652 V1TI_type_node, long_integer_type_node,
17653 pvoid_type_node, NULL_TREE);
17654 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17655 VSX_BUILTIN_LXVD2X_V1TI);
17656 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17657 VSX_BUILTIN_STXVD2X_V1TI);
17658 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17659 NULL_TREE, NULL_TREE);
17660 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17661 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17662 intTI_type_node,
17663 integer_type_node, NULL_TREE);
17664 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17665 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17666 integer_type_node, NULL_TREE);
17667 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17672 static void
17673 htm_init_builtins (void)
17675 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17676 const struct builtin_description *d;
17677 size_t i;
17679 d = bdesc_htm;
17680 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17682 tree op[MAX_HTM_OPERANDS], type;
17683 HOST_WIDE_INT mask = d->mask;
17684 unsigned attr = rs6000_builtin_info[d->code].attr;
17685 bool void_func = (attr & RS6000_BTC_VOID);
17686 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17687 int nopnds = 0;
17688 tree gpr_type_node;
17689 tree rettype;
17690 tree argtype;
17692 if (TARGET_32BIT && TARGET_POWERPC64)
17693 gpr_type_node = long_long_unsigned_type_node;
17694 else
17695 gpr_type_node = long_unsigned_type_node;
17697 if (attr & RS6000_BTC_SPR)
17699 rettype = gpr_type_node;
17700 argtype = gpr_type_node;
17702 else if (d->code == HTM_BUILTIN_TABORTDC
17703 || d->code == HTM_BUILTIN_TABORTDCI)
17705 rettype = unsigned_type_node;
17706 argtype = gpr_type_node;
17708 else
17710 rettype = unsigned_type_node;
17711 argtype = unsigned_type_node;
17714 if ((mask & builtin_mask) != mask)
17716 if (TARGET_DEBUG_BUILTIN)
17717 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17718 continue;
17721 if (d->name == 0)
17723 if (TARGET_DEBUG_BUILTIN)
17724 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17725 (long unsigned) i);
17726 continue;
17729 op[nopnds++] = (void_func) ? void_type_node : rettype;
17731 if (attr_args == RS6000_BTC_UNARY)
17732 op[nopnds++] = argtype;
17733 else if (attr_args == RS6000_BTC_BINARY)
17735 op[nopnds++] = argtype;
17736 op[nopnds++] = argtype;
17738 else if (attr_args == RS6000_BTC_TERNARY)
17740 op[nopnds++] = argtype;
17741 op[nopnds++] = argtype;
17742 op[nopnds++] = argtype;
17745 switch (nopnds)
17747 case 1:
17748 type = build_function_type_list (op[0], NULL_TREE);
17749 break;
17750 case 2:
17751 type = build_function_type_list (op[0], op[1], NULL_TREE);
17752 break;
17753 case 3:
17754 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17755 break;
17756 case 4:
17757 type = build_function_type_list (op[0], op[1], op[2], op[3],
17758 NULL_TREE);
17759 break;
17760 default:
17761 gcc_unreachable ();
17764 def_builtin (d->name, type, d->code);
17768 /* Hash function for builtin functions with up to 3 arguments and a return
17769 type. */
17770 hashval_t
17771 builtin_hasher::hash (builtin_hash_struct *bh)
17773 unsigned ret = 0;
17774 int i;
17776 for (i = 0; i < 4; i++)
17778 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17779 ret = (ret * 2) + bh->uns_p[i];
17782 return ret;
17785 /* Compare builtin hash entries H1 and H2 for equivalence. */
17786 bool
17787 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17789 return ((p1->mode[0] == p2->mode[0])
17790 && (p1->mode[1] == p2->mode[1])
17791 && (p1->mode[2] == p2->mode[2])
17792 && (p1->mode[3] == p2->mode[3])
17793 && (p1->uns_p[0] == p2->uns_p[0])
17794 && (p1->uns_p[1] == p2->uns_p[1])
17795 && (p1->uns_p[2] == p2->uns_p[2])
17796 && (p1->uns_p[3] == p2->uns_p[3]));
17799 /* Map types for builtin functions with an explicit return type and up to 3
17800 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17801 of the argument. */
17802 static tree
17803 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17804 machine_mode mode_arg1, machine_mode mode_arg2,
17805 enum rs6000_builtins builtin, const char *name)
17807 struct builtin_hash_struct h;
17808 struct builtin_hash_struct *h2;
17809 int num_args = 3;
17810 int i;
17811 tree ret_type = NULL_TREE;
17812 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17814 /* Create builtin_hash_table. */
17815 if (builtin_hash_table == NULL)
17816 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17818 h.type = NULL_TREE;
17819 h.mode[0] = mode_ret;
17820 h.mode[1] = mode_arg0;
17821 h.mode[2] = mode_arg1;
17822 h.mode[3] = mode_arg2;
17823 h.uns_p[0] = 0;
17824 h.uns_p[1] = 0;
17825 h.uns_p[2] = 0;
17826 h.uns_p[3] = 0;
17828 /* If the builtin is a type that produces unsigned results or takes unsigned
17829 arguments, and it is returned as a decl for the vectorizer (such as
17830 widening multiplies, permute), make sure the arguments and return value
17831 are type correct. */
17832 switch (builtin)
17834 /* unsigned 1 argument functions. */
17835 case CRYPTO_BUILTIN_VSBOX:
17836 case P8V_BUILTIN_VGBBD:
17837 case MISC_BUILTIN_CDTBCD:
17838 case MISC_BUILTIN_CBCDTD:
17839 h.uns_p[0] = 1;
17840 h.uns_p[1] = 1;
17841 break;
17843 /* unsigned 2 argument functions. */
17844 case ALTIVEC_BUILTIN_VMULEUB_UNS:
17845 case ALTIVEC_BUILTIN_VMULEUH_UNS:
17846 case ALTIVEC_BUILTIN_VMULOUB_UNS:
17847 case ALTIVEC_BUILTIN_VMULOUH_UNS:
17848 case CRYPTO_BUILTIN_VCIPHER:
17849 case CRYPTO_BUILTIN_VCIPHERLAST:
17850 case CRYPTO_BUILTIN_VNCIPHER:
17851 case CRYPTO_BUILTIN_VNCIPHERLAST:
17852 case CRYPTO_BUILTIN_VPMSUMB:
17853 case CRYPTO_BUILTIN_VPMSUMH:
17854 case CRYPTO_BUILTIN_VPMSUMW:
17855 case CRYPTO_BUILTIN_VPMSUMD:
17856 case CRYPTO_BUILTIN_VPMSUM:
17857 case MISC_BUILTIN_ADDG6S:
17858 case MISC_BUILTIN_DIVWEU:
17859 case MISC_BUILTIN_DIVWEUO:
17860 case MISC_BUILTIN_DIVDEU:
17861 case MISC_BUILTIN_DIVDEUO:
17862 h.uns_p[0] = 1;
17863 h.uns_p[1] = 1;
17864 h.uns_p[2] = 1;
17865 break;
17867 /* unsigned 3 argument functions. */
17868 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17869 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17870 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17871 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17872 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17873 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17874 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17875 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17876 case VSX_BUILTIN_VPERM_16QI_UNS:
17877 case VSX_BUILTIN_VPERM_8HI_UNS:
17878 case VSX_BUILTIN_VPERM_4SI_UNS:
17879 case VSX_BUILTIN_VPERM_2DI_UNS:
17880 case VSX_BUILTIN_XXSEL_16QI_UNS:
17881 case VSX_BUILTIN_XXSEL_8HI_UNS:
17882 case VSX_BUILTIN_XXSEL_4SI_UNS:
17883 case VSX_BUILTIN_XXSEL_2DI_UNS:
17884 case CRYPTO_BUILTIN_VPERMXOR:
17885 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17886 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17887 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17888 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17889 case CRYPTO_BUILTIN_VSHASIGMAW:
17890 case CRYPTO_BUILTIN_VSHASIGMAD:
17891 case CRYPTO_BUILTIN_VSHASIGMA:
17892 h.uns_p[0] = 1;
17893 h.uns_p[1] = 1;
17894 h.uns_p[2] = 1;
17895 h.uns_p[3] = 1;
17896 break;
17898 /* signed permute functions with unsigned char mask. */
17899 case ALTIVEC_BUILTIN_VPERM_16QI:
17900 case ALTIVEC_BUILTIN_VPERM_8HI:
17901 case ALTIVEC_BUILTIN_VPERM_4SI:
17902 case ALTIVEC_BUILTIN_VPERM_4SF:
17903 case ALTIVEC_BUILTIN_VPERM_2DI:
17904 case ALTIVEC_BUILTIN_VPERM_2DF:
17905 case VSX_BUILTIN_VPERM_16QI:
17906 case VSX_BUILTIN_VPERM_8HI:
17907 case VSX_BUILTIN_VPERM_4SI:
17908 case VSX_BUILTIN_VPERM_4SF:
17909 case VSX_BUILTIN_VPERM_2DI:
17910 case VSX_BUILTIN_VPERM_2DF:
17911 h.uns_p[3] = 1;
17912 break;
17914 /* unsigned args, signed return. */
17915 case VSX_BUILTIN_XVCVUXDDP_UNS:
17916 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17917 h.uns_p[1] = 1;
17918 break;
17920 /* signed args, unsigned return. */
17921 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17922 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17923 case MISC_BUILTIN_UNPACK_TD:
17924 case MISC_BUILTIN_UNPACK_V1TI:
17925 h.uns_p[0] = 1;
17926 break;
17928 /* unsigned arguments for 128-bit pack instructions. */
17929 case MISC_BUILTIN_PACK_TD:
17930 case MISC_BUILTIN_PACK_V1TI:
17931 h.uns_p[1] = 1;
17932 h.uns_p[2] = 1;
17933 break;
17935 default:
17936 break;
17939 /* Figure out how many args are present. */
17940 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17941 num_args--;
17943 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17944 if (!ret_type && h.uns_p[0])
17945 ret_type = builtin_mode_to_type[h.mode[0]][0];
17947 if (!ret_type)
17948 fatal_error (input_location,
17949 "internal error: builtin function %s had an unexpected "
17950 "return type %s", name, GET_MODE_NAME (h.mode[0]));
17952 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17953 arg_type[i] = NULL_TREE;
17955 for (i = 0; i < num_args; i++)
17957 int m = (int) h.mode[i+1];
17958 int uns_p = h.uns_p[i+1];
17960 arg_type[i] = builtin_mode_to_type[m][uns_p];
17961 if (!arg_type[i] && uns_p)
17962 arg_type[i] = builtin_mode_to_type[m][0];
17964 if (!arg_type[i])
17965 fatal_error (input_location,
17966 "internal error: builtin function %s, argument %d "
17967 "had unexpected argument type %s", name, i,
17968 GET_MODE_NAME (m));
17971 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17972 if (*found == NULL)
17974 h2 = ggc_alloc<builtin_hash_struct> ();
17975 *h2 = h;
17976 *found = h2;
17978 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17979 arg_type[2], NULL_TREE);
17982 return (*found)->type;
17985 static void
17986 rs6000_common_init_builtins (void)
17988 const struct builtin_description *d;
17989 size_t i;
17991 tree opaque_ftype_opaque = NULL_TREE;
17992 tree opaque_ftype_opaque_opaque = NULL_TREE;
17993 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17994 tree v2si_ftype = NULL_TREE;
17995 tree v2si_ftype_qi = NULL_TREE;
17996 tree v2si_ftype_v2si_qi = NULL_TREE;
17997 tree v2si_ftype_int_qi = NULL_TREE;
17998 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18000 if (!TARGET_PAIRED_FLOAT)
18002 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18003 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18006 /* Paired and SPE builtins are only available if you build a compiler with
18007 the appropriate options, so only create those builtins with the
18008 appropriate compiler option. Create Altivec and VSX builtins on machines
18009 with at least the general purpose extensions (970 and newer) to allow the
18010 use of the target attribute.. */
18012 if (TARGET_EXTRA_BUILTINS)
18013 builtin_mask |= RS6000_BTM_COMMON;
18015 /* Add the ternary operators. */
18016 d = bdesc_3arg;
18017 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18019 tree type;
18020 HOST_WIDE_INT mask = d->mask;
18022 if ((mask & builtin_mask) != mask)
18024 if (TARGET_DEBUG_BUILTIN)
18025 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18026 continue;
18029 if (rs6000_overloaded_builtin_p (d->code))
18031 if (! (type = opaque_ftype_opaque_opaque_opaque))
18032 type = opaque_ftype_opaque_opaque_opaque
18033 = build_function_type_list (opaque_V4SI_type_node,
18034 opaque_V4SI_type_node,
18035 opaque_V4SI_type_node,
18036 opaque_V4SI_type_node,
18037 NULL_TREE);
18039 else
18041 enum insn_code icode = d->icode;
18042 if (d->name == 0)
18044 if (TARGET_DEBUG_BUILTIN)
18045 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18046 (long unsigned)i);
18048 continue;
18051 if (icode == CODE_FOR_nothing)
18053 if (TARGET_DEBUG_BUILTIN)
18054 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18055 d->name);
18057 continue;
18060 type = builtin_function_type (insn_data[icode].operand[0].mode,
18061 insn_data[icode].operand[1].mode,
18062 insn_data[icode].operand[2].mode,
18063 insn_data[icode].operand[3].mode,
18064 d->code, d->name);
18067 def_builtin (d->name, type, d->code);
18070 /* Add the binary operators. */
18071 d = bdesc_2arg;
18072 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18074 machine_mode mode0, mode1, mode2;
18075 tree type;
18076 HOST_WIDE_INT mask = d->mask;
18078 if ((mask & builtin_mask) != mask)
18080 if (TARGET_DEBUG_BUILTIN)
18081 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18082 continue;
18085 if (rs6000_overloaded_builtin_p (d->code))
18087 if (! (type = opaque_ftype_opaque_opaque))
18088 type = opaque_ftype_opaque_opaque
18089 = build_function_type_list (opaque_V4SI_type_node,
18090 opaque_V4SI_type_node,
18091 opaque_V4SI_type_node,
18092 NULL_TREE);
18094 else
18096 enum insn_code icode = d->icode;
18097 if (d->name == 0)
18099 if (TARGET_DEBUG_BUILTIN)
18100 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18101 (long unsigned)i);
18103 continue;
18106 if (icode == CODE_FOR_nothing)
18108 if (TARGET_DEBUG_BUILTIN)
18109 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18110 d->name);
18112 continue;
18115 mode0 = insn_data[icode].operand[0].mode;
18116 mode1 = insn_data[icode].operand[1].mode;
18117 mode2 = insn_data[icode].operand[2].mode;
18119 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18121 if (! (type = v2si_ftype_v2si_qi))
18122 type = v2si_ftype_v2si_qi
18123 = build_function_type_list (opaque_V2SI_type_node,
18124 opaque_V2SI_type_node,
18125 char_type_node,
18126 NULL_TREE);
18129 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18130 && mode2 == QImode)
18132 if (! (type = v2si_ftype_int_qi))
18133 type = v2si_ftype_int_qi
18134 = build_function_type_list (opaque_V2SI_type_node,
18135 integer_type_node,
18136 char_type_node,
18137 NULL_TREE);
18140 else
18141 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18142 d->code, d->name);
18145 def_builtin (d->name, type, d->code);
18148 /* Add the simple unary operators. */
18149 d = bdesc_1arg;
18150 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18152 machine_mode mode0, mode1;
18153 tree type;
18154 HOST_WIDE_INT mask = d->mask;
18156 if ((mask & builtin_mask) != mask)
18158 if (TARGET_DEBUG_BUILTIN)
18159 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18160 continue;
18163 if (rs6000_overloaded_builtin_p (d->code))
18165 if (! (type = opaque_ftype_opaque))
18166 type = opaque_ftype_opaque
18167 = build_function_type_list (opaque_V4SI_type_node,
18168 opaque_V4SI_type_node,
18169 NULL_TREE);
18171 else
18173 enum insn_code icode = d->icode;
18174 if (d->name == 0)
18176 if (TARGET_DEBUG_BUILTIN)
18177 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18178 (long unsigned)i);
18180 continue;
18183 if (icode == CODE_FOR_nothing)
18185 if (TARGET_DEBUG_BUILTIN)
18186 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18187 d->name);
18189 continue;
18192 mode0 = insn_data[icode].operand[0].mode;
18193 mode1 = insn_data[icode].operand[1].mode;
18195 if (mode0 == V2SImode && mode1 == QImode)
18197 if (! (type = v2si_ftype_qi))
18198 type = v2si_ftype_qi
18199 = build_function_type_list (opaque_V2SI_type_node,
18200 char_type_node,
18201 NULL_TREE);
18204 else
18205 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18206 d->code, d->name);
18209 def_builtin (d->name, type, d->code);
18212 /* Add the simple no-argument operators. */
18213 d = bdesc_0arg;
18214 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18216 machine_mode mode0;
18217 tree type;
18218 HOST_WIDE_INT mask = d->mask;
18220 if ((mask & builtin_mask) != mask)
18222 if (TARGET_DEBUG_BUILTIN)
18223 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18224 continue;
18226 if (rs6000_overloaded_builtin_p (d->code))
18228 if (!opaque_ftype_opaque)
18229 opaque_ftype_opaque
18230 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18231 type = opaque_ftype_opaque;
18233 else
18235 enum insn_code icode = d->icode;
18236 if (d->name == 0)
18238 if (TARGET_DEBUG_BUILTIN)
18239 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18240 (long unsigned) i);
18241 continue;
18243 if (icode == CODE_FOR_nothing)
18245 if (TARGET_DEBUG_BUILTIN)
18246 fprintf (stderr,
18247 "rs6000_builtin, skip no-argument %s (no code)\n",
18248 d->name);
18249 continue;
18251 mode0 = insn_data[icode].operand[0].mode;
18252 if (mode0 == V2SImode)
18254 /* code for SPE */
18255 if (! (type = v2si_ftype))
18257 v2si_ftype
18258 = build_function_type_list (opaque_V2SI_type_node,
18259 NULL_TREE);
18260 type = v2si_ftype;
18263 else
18264 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18265 d->code, d->name);
18267 def_builtin (d->name, type, d->code);
18271 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18272 static void
18273 init_float128_ibm (machine_mode mode)
18275 if (!TARGET_XL_COMPAT)
18277 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18278 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18279 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18280 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18282 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
18284 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18285 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18286 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18287 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18288 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18289 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18290 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18292 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18293 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18294 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18295 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18296 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18297 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18298 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18299 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18302 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
18303 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18305 else
18307 set_optab_libfunc (add_optab, mode, "_xlqadd");
18308 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18309 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18310 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18313 /* Add various conversions for IFmode to use the traditional TFmode
18314 names. */
18315 if (mode == IFmode)
18317 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18318 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18319 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18320 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18321 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18322 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18324 if (TARGET_POWERPC64)
18326 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18327 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18328 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18329 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18334 /* Set up IEEE 128-bit floating point routines. Use different names if the
18335 arguments can be passed in a vector register. The historical PowerPC
18336 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18337 continue to use that if we aren't using vector registers to pass IEEE
18338 128-bit floating point. */
18340 static void
18341 init_float128_ieee (machine_mode mode)
18343 if (FLOAT128_VECTOR_P (mode))
18345 set_optab_libfunc (add_optab, mode, "__addkf3");
18346 set_optab_libfunc (sub_optab, mode, "__subkf3");
18347 set_optab_libfunc (neg_optab, mode, "__negkf2");
18348 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18349 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18350 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18351 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18353 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18354 set_optab_libfunc (ne_optab, mode, "__nekf2");
18355 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18356 set_optab_libfunc (ge_optab, mode, "__gekf2");
18357 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18358 set_optab_libfunc (le_optab, mode, "__lekf2");
18359 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18361 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18362 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18363 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18364 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18366 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18367 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18368 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18370 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18371 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18372 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18374 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18375 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18376 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18377 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18378 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18379 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18381 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18382 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18383 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18384 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18386 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18387 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18388 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18389 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18391 if (TARGET_POWERPC64)
18393 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18394 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18395 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18396 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18400 else
18402 set_optab_libfunc (add_optab, mode, "_q_add");
18403 set_optab_libfunc (sub_optab, mode, "_q_sub");
18404 set_optab_libfunc (neg_optab, mode, "_q_neg");
18405 set_optab_libfunc (smul_optab, mode, "_q_mul");
18406 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18407 if (TARGET_PPC_GPOPT)
18408 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18410 set_optab_libfunc (eq_optab, mode, "_q_feq");
18411 set_optab_libfunc (ne_optab, mode, "_q_fne");
18412 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18413 set_optab_libfunc (ge_optab, mode, "_q_fge");
18414 set_optab_libfunc (lt_optab, mode, "_q_flt");
18415 set_optab_libfunc (le_optab, mode, "_q_fle");
18417 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18418 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18419 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18420 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18421 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18422 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18423 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18424 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18428 static void
18429 rs6000_init_libfuncs (void)
18431 /* __float128 support. */
18432 if (TARGET_FLOAT128_TYPE)
18434 init_float128_ibm (IFmode);
18435 init_float128_ieee (KFmode);
18438 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18439 if (TARGET_LONG_DOUBLE_128)
18441 if (!TARGET_IEEEQUAD)
18442 init_float128_ibm (TFmode);
18444 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18445 else
18446 init_float128_ieee (TFmode);
18451 /* Expand a block clear operation, and return 1 if successful. Return 0
18452 if we should let the compiler generate normal code.
18454 operands[0] is the destination
18455 operands[1] is the length
18456 operands[3] is the alignment */
18459 expand_block_clear (rtx operands[])
18461 rtx orig_dest = operands[0];
18462 rtx bytes_rtx = operands[1];
18463 rtx align_rtx = operands[3];
18464 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
18465 HOST_WIDE_INT align;
18466 HOST_WIDE_INT bytes;
18467 int offset;
18468 int clear_bytes;
18469 int clear_step;
18471 /* If this is not a fixed size move, just call memcpy */
18472 if (! constp)
18473 return 0;
18475 /* This must be a fixed size alignment */
18476 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18477 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18479 /* Anything to clear? */
18480 bytes = INTVAL (bytes_rtx);
18481 if (bytes <= 0)
18482 return 1;
18484 /* Use the builtin memset after a point, to avoid huge code bloat.
18485 When optimize_size, avoid any significant code bloat; calling
18486 memset is about 4 instructions, so allow for one instruction to
18487 load zero and three to do clearing. */
18488 if (TARGET_ALTIVEC && align >= 128)
18489 clear_step = 16;
18490 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
18491 clear_step = 8;
18492 else if (TARGET_SPE && align >= 64)
18493 clear_step = 8;
18494 else
18495 clear_step = 4;
18497 if (optimize_size && bytes > 3 * clear_step)
18498 return 0;
18499 if (! optimize_size && bytes > 8 * clear_step)
18500 return 0;
18502 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
18504 machine_mode mode = BLKmode;
18505 rtx dest;
18507 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
18509 clear_bytes = 16;
18510 mode = V4SImode;
18512 else if (bytes >= 8 && TARGET_SPE && align >= 64)
18514 clear_bytes = 8;
18515 mode = V2SImode;
18517 else if (bytes >= 8 && TARGET_POWERPC64
18518 && (align >= 64 || !STRICT_ALIGNMENT))
18520 clear_bytes = 8;
18521 mode = DImode;
18522 if (offset == 0 && align < 64)
18524 rtx addr;
18526 /* If the address form is reg+offset with offset not a
18527 multiple of four, reload into reg indirect form here
18528 rather than waiting for reload. This way we get one
18529 reload, not one per store. */
18530 addr = XEXP (orig_dest, 0);
18531 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18532 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18533 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18535 addr = copy_addr_to_reg (addr);
18536 orig_dest = replace_equiv_address (orig_dest, addr);
18540 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
18541 { /* move 4 bytes */
18542 clear_bytes = 4;
18543 mode = SImode;
18545 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
18546 { /* move 2 bytes */
18547 clear_bytes = 2;
18548 mode = HImode;
18550 else /* move 1 byte at a time */
18552 clear_bytes = 1;
18553 mode = QImode;
18556 dest = adjust_address (orig_dest, mode, offset);
18558 emit_move_insn (dest, CONST0_RTX (mode));
18561 return 1;
18564 /* Emit a potentially record-form instruction, setting DST from SRC.
18565 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18566 signed comparison of DST with zero. If DOT is 1, the generated RTL
18567 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18568 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18569 a separate COMPARE. */
18571 static void
18572 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18574 if (dot == 0)
18576 emit_move_insn (dst, src);
18577 return;
18580 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18582 emit_move_insn (dst, src);
18583 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18584 return;
18587 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18588 if (dot == 1)
18590 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18591 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18593 else
18595 rtx set = gen_rtx_SET (dst, src);
18596 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18601 /* Figure out the correct instructions to generate to load data for
18602 block compare. MODE is used for the read from memory, and
18603 data is zero extended if REG is wider than MODE. If LE code
18604 is being generated, bswap loads are used.
18606 REG is the destination register to move the data into.
18607 MEM is the memory block being read.
18608 MODE is the mode of memory to use for the read. */
18609 static void
18610 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
18612 switch (GET_MODE (reg))
18614 case DImode:
18615 switch (mode)
18617 case QImode:
18618 emit_insn (gen_zero_extendqidi2 (reg, mem));
18619 break;
18620 case HImode:
18622 rtx src = mem;
18623 if (!BYTES_BIG_ENDIAN)
18625 src = gen_reg_rtx (HImode);
18626 emit_insn (gen_bswaphi2 (src, mem));
18628 emit_insn (gen_zero_extendhidi2 (reg, src));
18629 break;
18631 case SImode:
18633 rtx src = mem;
18634 if (!BYTES_BIG_ENDIAN)
18636 src = gen_reg_rtx (SImode);
18637 emit_insn (gen_bswapsi2 (src, mem));
18639 emit_insn (gen_zero_extendsidi2 (reg, src));
18641 break;
18642 case DImode:
18643 if (!BYTES_BIG_ENDIAN)
18644 emit_insn (gen_bswapdi2 (reg, mem));
18645 else
18646 emit_insn (gen_movdi (reg, mem));
18647 break;
18648 default:
18649 gcc_unreachable ();
18651 break;
18653 case SImode:
18654 switch (mode)
18656 case QImode:
18657 emit_insn (gen_zero_extendqisi2 (reg, mem));
18658 break;
18659 case HImode:
18661 rtx src = mem;
18662 if (!BYTES_BIG_ENDIAN)
18664 src = gen_reg_rtx (HImode);
18665 emit_insn (gen_bswaphi2 (src, mem));
18667 emit_insn (gen_zero_extendhisi2 (reg, src));
18668 break;
18670 case SImode:
18671 if (!BYTES_BIG_ENDIAN)
18672 emit_insn (gen_bswapsi2 (reg, mem));
18673 else
18674 emit_insn (gen_movsi (reg, mem));
18675 break;
18676 case DImode:
18677 /* DImode is larger than the destination reg so is not expected. */
18678 gcc_unreachable ();
18679 break;
18680 default:
18681 gcc_unreachable ();
18683 break;
18684 default:
18685 gcc_unreachable ();
18686 break;
18690 /* Select the mode to be used for reading the next chunk of bytes
18691 in the compare.
18693 OFFSET is the current read offset from the beginning of the block.
18694 BYTES is the number of bytes remaining to be read.
18695 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
18696 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
18697 the largest allowable mode. */
18698 static machine_mode
18699 select_block_compare_mode (HOST_WIDE_INT offset, HOST_WIDE_INT bytes,
18700 HOST_WIDE_INT align, bool word_mode_ok)
18702 /* First see if we can do a whole load unit
18703 as that will be more efficient than a larger load + shift. */
18705 /* If big, use biggest chunk.
18706 If exactly chunk size, use that size.
18707 If remainder can be done in one piece with shifting, do that.
18708 Do largest chunk possible without violating alignment rules. */
18710 /* The most we can read without potential page crossing. */
18711 HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
18713 if (word_mode_ok && bytes >= UNITS_PER_WORD)
18714 return word_mode;
18715 else if (bytes == GET_MODE_SIZE (SImode))
18716 return SImode;
18717 else if (bytes == GET_MODE_SIZE (HImode))
18718 return HImode;
18719 else if (bytes == GET_MODE_SIZE (QImode))
18720 return QImode;
18721 else if (bytes < GET_MODE_SIZE (SImode)
18722 && offset >= GET_MODE_SIZE (SImode) - bytes)
18723 /* This matches the case were we have SImode and 3 bytes
18724 and offset >= 1 and permits us to move back one and overlap
18725 with the previous read, thus avoiding having to shift
18726 unwanted bytes off of the input. */
18727 return SImode;
18728 else if (word_mode_ok && bytes < UNITS_PER_WORD
18729 && offset >= UNITS_PER_WORD-bytes)
18730 /* Similarly, if we can use DImode it will get matched here and
18731 can do an overlapping read that ends at the end of the block. */
18732 return word_mode;
18733 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
18734 /* It is safe to do all remaining in one load of largest size,
18735 possibly with a shift to get rid of unwanted bytes. */
18736 return word_mode;
18737 else if (maxread >= GET_MODE_SIZE (SImode))
18738 /* It is safe to do all remaining in one SImode load,
18739 possibly with a shift to get rid of unwanted bytes. */
18740 return SImode;
18741 else if (bytes > GET_MODE_SIZE (SImode))
18742 return SImode;
18743 else if (bytes > GET_MODE_SIZE (HImode))
18744 return HImode;
18746 /* final fallback is do one byte */
18747 return QImode;
18750 /* Compute the alignment of pointer+OFFSET where the original alignment
18751 of pointer was BASE_ALIGN. */
18752 static HOST_WIDE_INT
18753 compute_current_alignment (HOST_WIDE_INT base_align, HOST_WIDE_INT offset)
18755 if (offset == 0)
18756 return base_align;
18757 return min (base_align, offset & -offset);
18760 /* Expand a block compare operation, and return true if successful.
18761 Return false if we should let the compiler generate normal code,
18762 probably a memcmp call.
18764 OPERANDS[0] is the target (result).
18765 OPERANDS[1] is the first source.
18766 OPERANDS[2] is the second source.
18767 OPERANDS[3] is the length.
18768 OPERANDS[4] is the alignment. */
18769 bool
18770 expand_block_compare (rtx operands[])
18772 rtx target = operands[0];
18773 rtx orig_src1 = operands[1];
18774 rtx orig_src2 = operands[2];
18775 rtx bytes_rtx = operands[3];
18776 rtx align_rtx = operands[4];
18777 HOST_WIDE_INT cmp_bytes = 0;
18778 rtx src1 = orig_src1;
18779 rtx src2 = orig_src2;
18781 /* If this is not a fixed size compare, just call memcmp */
18782 if (!CONST_INT_P (bytes_rtx))
18783 return false;
18785 /* This must be a fixed size alignment */
18786 if (!CONST_INT_P (align_rtx))
18787 return false;
18789 int base_align = INTVAL (align_rtx) / BITS_PER_UNIT;
18791 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
18792 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
18793 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
18794 return false;
18796 gcc_assert (GET_MODE (target) == SImode);
18798 /* Anything to move? */
18799 HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
18800 if (bytes <= 0)
18801 return true;
18803 /* The code generated for p7 and older is not faster than glibc
18804 memcmp if alignment is small and length is not short, so bail
18805 out to avoid those conditions. */
18806 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
18807 && ((base_align == 1 && bytes > 16)
18808 || (base_align == 2 && bytes > 32)))
18809 return false;
18811 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
18812 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
18814 /* If we have an LE target without ldbrx and word_mode is DImode,
18815 then we must avoid using word_mode. */
18816 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
18817 && word_mode == DImode);
18819 /* Strategy phase. How many ops will this take and should we expand it? */
18821 int offset = 0;
18822 machine_mode load_mode =
18823 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
18824 int load_mode_size = GET_MODE_SIZE (load_mode);
18826 /* We don't want to generate too much code. */
18827 if (ROUND_UP (bytes, load_mode_size) / load_mode_size
18828 > rs6000_block_compare_inline_limit)
18829 return false;
18831 bool generate_6432_conversion = false;
18832 rtx convert_label = NULL;
18833 rtx final_label = NULL;
18835 /* Example of generated code for 11 bytes aligned 1 byte:
18836 .L10:
18837 ldbrx 10,6,9
18838 ldbrx 9,7,9
18839 subf. 9,9,10
18840 bne 0,.L8
18841 addi 9,4,7
18842 lwbrx 10,0,9
18843 addi 9,5,7
18844 lwbrx 9,0,9
18845 subf 9,9,10
18846 b .L9
18847 .L8: # convert_label
18848 cntlzd 9,9
18849 addi 9,9,-1
18850 xori 9,9,0x3f
18851 .L9: # final_label
18853 We start off with DImode and have a compare/branch to something
18854 with a smaller mode then we will need a block with the DI->SI conversion
18855 that may or may not be executed. */
18857 while (bytes > 0)
18859 int align = compute_current_alignment (base_align, offset);
18860 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
18861 load_mode = select_block_compare_mode (offset, bytes, align,
18862 word_mode_ok);
18863 else
18864 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
18865 load_mode_size = GET_MODE_SIZE (load_mode);
18866 if (bytes >= load_mode_size)
18867 cmp_bytes = load_mode_size;
18868 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
18870 /* Move this load back so it doesn't go past the end.
18871 P8/P9 can do this efficiently. */
18872 int extra_bytes = load_mode_size - bytes;
18873 cmp_bytes = bytes;
18874 if (extra_bytes < offset)
18876 offset -= extra_bytes;
18877 cmp_bytes = load_mode_size;
18878 bytes = cmp_bytes;
18881 else
18882 /* P7 and earlier can't do the overlapping load trick fast,
18883 so this forces a non-overlapping load and a shift to get
18884 rid of the extra bytes. */
18885 cmp_bytes = bytes;
18887 src1 = adjust_address (orig_src1, load_mode, offset);
18888 src2 = adjust_address (orig_src2, load_mode, offset);
18890 if (!REG_P (XEXP (src1, 0)))
18892 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
18893 src1 = replace_equiv_address (src1, src1_reg);
18895 set_mem_size (src1, cmp_bytes);
18897 if (!REG_P (XEXP (src2, 0)))
18899 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
18900 src2 = replace_equiv_address (src2, src2_reg);
18902 set_mem_size (src2, cmp_bytes);
18904 do_load_for_compare (tmp_reg_src1, src1, load_mode);
18905 do_load_for_compare (tmp_reg_src2, src2, load_mode);
18907 if (cmp_bytes < load_mode_size)
18909 /* Shift unneeded bytes off. */
18910 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
18911 if (word_mode == DImode)
18913 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
18914 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
18916 else
18918 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
18919 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
18923 /* We previously did a block that need 64->32 conversion but
18924 the current block does not, so a label is needed to jump
18925 to the end. */
18926 if (generate_6432_conversion && !final_label
18927 && GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size)
18928 final_label = gen_label_rtx ();
18930 /* Do we need a 64->32 conversion block? */
18931 int remain = bytes - cmp_bytes;
18932 if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode))
18934 generate_6432_conversion = true;
18935 if (remain > 0 && !convert_label)
18936 convert_label = gen_label_rtx ();
18939 if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode))
18941 /* Target is larger than load size so we don't need to
18942 reduce result size. */
18943 if (remain > 0)
18945 /* This is not the last block, branch to the end if the result
18946 of this subtract is not zero. */
18947 if (!final_label)
18948 final_label = gen_label_rtx ();
18949 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
18950 rtx cond = gen_reg_rtx (CCmode);
18951 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
18952 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
18953 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
18954 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18955 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
18956 fin_ref, pc_rtx);
18957 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
18958 JUMP_LABEL (j) = final_label;
18959 LABEL_NUSES (final_label) += 1;
18961 else
18963 if (word_mode == DImode)
18965 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
18966 tmp_reg_src2));
18967 emit_insn (gen_movsi (target,
18968 gen_lowpart (SImode, tmp_reg_src2)));
18970 else
18971 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
18973 if (final_label)
18975 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
18976 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
18977 JUMP_LABEL(j) = final_label;
18978 LABEL_NUSES (final_label) += 1;
18979 emit_barrier ();
18983 else
18985 generate_6432_conversion = true;
18986 if (remain > 0)
18988 if (!convert_label)
18989 convert_label = gen_label_rtx ();
18991 /* Compare to zero and branch to convert_label if not zero. */
18992 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
18993 rtx cond = gen_reg_rtx (CCmode);
18994 rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2);
18995 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
18996 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18997 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
18998 cvt_ref, pc_rtx);
18999 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19000 JUMP_LABEL(j) = convert_label;
19001 LABEL_NUSES (convert_label) += 1;
19003 else
19005 /* Just do the subtract. Since this is the last block the
19006 convert code will be generated immediately following. */
19007 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19008 tmp_reg_src2));
19012 offset += cmp_bytes;
19013 bytes -= cmp_bytes;
19016 if (generate_6432_conversion)
19018 if (convert_label)
19019 emit_label (convert_label);
19021 /* We need to produce DI result from sub, then convert to target SI
19022 while maintaining <0 / ==0 / >0 properties.
19023 Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */
19024 emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2));
19025 emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1)));
19026 emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63)));
19027 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19030 if (final_label)
19031 emit_label (final_label);
19033 gcc_assert (bytes == 0);
19034 return true;
19038 /* Expand a block move operation, and return 1 if successful. Return 0
19039 if we should let the compiler generate normal code.
19041 operands[0] is the destination
19042 operands[1] is the source
19043 operands[2] is the length
19044 operands[3] is the alignment */
19046 #define MAX_MOVE_REG 4
19049 expand_block_move (rtx operands[])
19051 rtx orig_dest = operands[0];
19052 rtx orig_src = operands[1];
19053 rtx bytes_rtx = operands[2];
19054 rtx align_rtx = operands[3];
19055 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
19056 int align;
19057 int bytes;
19058 int offset;
19059 int move_bytes;
19060 rtx stores[MAX_MOVE_REG];
19061 int num_reg = 0;
19063 /* If this is not a fixed size move, just call memcpy */
19064 if (! constp)
19065 return 0;
19067 /* This must be a fixed size alignment */
19068 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19069 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19071 /* Anything to move? */
19072 bytes = INTVAL (bytes_rtx);
19073 if (bytes <= 0)
19074 return 1;
19076 if (bytes > rs6000_block_move_inline_limit)
19077 return 0;
19079 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
19081 union {
19082 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
19083 rtx (*mov) (rtx, rtx);
19084 } gen_func;
19085 machine_mode mode = BLKmode;
19086 rtx src, dest;
19088 /* Altivec first, since it will be faster than a string move
19089 when it applies, and usually not significantly larger. */
19090 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
19092 move_bytes = 16;
19093 mode = V4SImode;
19094 gen_func.mov = gen_movv4si;
19096 else if (TARGET_SPE && bytes >= 8 && align >= 64)
19098 move_bytes = 8;
19099 mode = V2SImode;
19100 gen_func.mov = gen_movv2si;
19102 else if (TARGET_STRING
19103 && bytes > 24 /* move up to 32 bytes at a time */
19104 && ! fixed_regs[5]
19105 && ! fixed_regs[6]
19106 && ! fixed_regs[7]
19107 && ! fixed_regs[8]
19108 && ! fixed_regs[9]
19109 && ! fixed_regs[10]
19110 && ! fixed_regs[11]
19111 && ! fixed_regs[12])
19113 move_bytes = (bytes > 32) ? 32 : bytes;
19114 gen_func.movmemsi = gen_movmemsi_8reg;
19116 else if (TARGET_STRING
19117 && bytes > 16 /* move up to 24 bytes at a time */
19118 && ! fixed_regs[5]
19119 && ! fixed_regs[6]
19120 && ! fixed_regs[7]
19121 && ! fixed_regs[8]
19122 && ! fixed_regs[9]
19123 && ! fixed_regs[10])
19125 move_bytes = (bytes > 24) ? 24 : bytes;
19126 gen_func.movmemsi = gen_movmemsi_6reg;
19128 else if (TARGET_STRING
19129 && bytes > 8 /* move up to 16 bytes at a time */
19130 && ! fixed_regs[5]
19131 && ! fixed_regs[6]
19132 && ! fixed_regs[7]
19133 && ! fixed_regs[8])
19135 move_bytes = (bytes > 16) ? 16 : bytes;
19136 gen_func.movmemsi = gen_movmemsi_4reg;
19138 else if (bytes >= 8 && TARGET_POWERPC64
19139 && (align >= 64 || !STRICT_ALIGNMENT))
19141 move_bytes = 8;
19142 mode = DImode;
19143 gen_func.mov = gen_movdi;
19144 if (offset == 0 && align < 64)
19146 rtx addr;
19148 /* If the address form is reg+offset with offset not a
19149 multiple of four, reload into reg indirect form here
19150 rather than waiting for reload. This way we get one
19151 reload, not one per load and/or store. */
19152 addr = XEXP (orig_dest, 0);
19153 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19154 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19155 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19157 addr = copy_addr_to_reg (addr);
19158 orig_dest = replace_equiv_address (orig_dest, addr);
19160 addr = XEXP (orig_src, 0);
19161 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19162 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19163 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19165 addr = copy_addr_to_reg (addr);
19166 orig_src = replace_equiv_address (orig_src, addr);
19170 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
19171 { /* move up to 8 bytes at a time */
19172 move_bytes = (bytes > 8) ? 8 : bytes;
19173 gen_func.movmemsi = gen_movmemsi_2reg;
19175 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19176 { /* move 4 bytes */
19177 move_bytes = 4;
19178 mode = SImode;
19179 gen_func.mov = gen_movsi;
19181 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19182 { /* move 2 bytes */
19183 move_bytes = 2;
19184 mode = HImode;
19185 gen_func.mov = gen_movhi;
19187 else if (TARGET_STRING && bytes > 1)
19188 { /* move up to 4 bytes at a time */
19189 move_bytes = (bytes > 4) ? 4 : bytes;
19190 gen_func.movmemsi = gen_movmemsi_1reg;
19192 else /* move 1 byte at a time */
19194 move_bytes = 1;
19195 mode = QImode;
19196 gen_func.mov = gen_movqi;
19199 src = adjust_address (orig_src, mode, offset);
19200 dest = adjust_address (orig_dest, mode, offset);
19202 if (mode != BLKmode)
19204 rtx tmp_reg = gen_reg_rtx (mode);
19206 emit_insn ((*gen_func.mov) (tmp_reg, src));
19207 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
19210 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
19212 int i;
19213 for (i = 0; i < num_reg; i++)
19214 emit_insn (stores[i]);
19215 num_reg = 0;
19218 if (mode == BLKmode)
19220 /* Move the address into scratch registers. The movmemsi
19221 patterns require zero offset. */
19222 if (!REG_P (XEXP (src, 0)))
19224 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
19225 src = replace_equiv_address (src, src_reg);
19227 set_mem_size (src, move_bytes);
19229 if (!REG_P (XEXP (dest, 0)))
19231 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
19232 dest = replace_equiv_address (dest, dest_reg);
19234 set_mem_size (dest, move_bytes);
19236 emit_insn ((*gen_func.movmemsi) (dest, src,
19237 GEN_INT (move_bytes & 31),
19238 align_rtx));
19242 return 1;
19246 /* Return a string to perform a load_multiple operation.
19247 operands[0] is the vector.
19248 operands[1] is the source address.
19249 operands[2] is the first destination register. */
19251 const char *
19252 rs6000_output_load_multiple (rtx operands[3])
19254 /* We have to handle the case where the pseudo used to contain the address
19255 is assigned to one of the output registers. */
19256 int i, j;
19257 int words = XVECLEN (operands[0], 0);
19258 rtx xop[10];
19260 if (XVECLEN (operands[0], 0) == 1)
19261 return "lwz %2,0(%1)";
19263 for (i = 0; i < words; i++)
19264 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
19266 if (i == words-1)
19268 xop[0] = GEN_INT (4 * (words-1));
19269 xop[1] = operands[1];
19270 xop[2] = operands[2];
19271 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
19272 return "";
19274 else if (i == 0)
19276 xop[0] = GEN_INT (4 * (words-1));
19277 xop[1] = operands[1];
19278 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
19279 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
19280 return "";
19282 else
19284 for (j = 0; j < words; j++)
19285 if (j != i)
19287 xop[0] = GEN_INT (j * 4);
19288 xop[1] = operands[1];
19289 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
19290 output_asm_insn ("lwz %2,%0(%1)", xop);
19292 xop[0] = GEN_INT (i * 4);
19293 xop[1] = operands[1];
19294 output_asm_insn ("lwz %1,%0(%1)", xop);
19295 return "";
19299 return "lswi %2,%1,%N0";
19303 /* A validation routine: say whether CODE, a condition code, and MODE
19304 match. The other alternatives either don't make sense or should
19305 never be generated. */
19307 void
19308 validate_condition_mode (enum rtx_code code, machine_mode mode)
19310 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
19311 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
19312 && GET_MODE_CLASS (mode) == MODE_CC);
19314 /* These don't make sense. */
19315 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
19316 || mode != CCUNSmode);
19318 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
19319 || mode == CCUNSmode);
19321 gcc_assert (mode == CCFPmode
19322 || (code != ORDERED && code != UNORDERED
19323 && code != UNEQ && code != LTGT
19324 && code != UNGT && code != UNLT
19325 && code != UNGE && code != UNLE));
19327 /* These should never be generated except for
19328 flag_finite_math_only. */
19329 gcc_assert (mode != CCFPmode
19330 || flag_finite_math_only
19331 || (code != LE && code != GE
19332 && code != UNEQ && code != LTGT
19333 && code != UNGT && code != UNLT));
19335 /* These are invalid; the information is not there. */
19336 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
19340 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
19341 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
19342 not zero, store there the bit offset (counted from the right) where
19343 the single stretch of 1 bits begins; and similarly for B, the bit
19344 offset where it ends. */
19346 bool
19347 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
19349 unsigned HOST_WIDE_INT val = INTVAL (mask);
19350 unsigned HOST_WIDE_INT bit;
19351 int nb, ne;
19352 int n = GET_MODE_PRECISION (mode);
19354 if (mode != DImode && mode != SImode)
19355 return false;
19357 if (INTVAL (mask) >= 0)
19359 bit = val & -val;
19360 ne = exact_log2 (bit);
19361 nb = exact_log2 (val + bit);
19363 else if (val + 1 == 0)
19365 nb = n;
19366 ne = 0;
19368 else if (val & 1)
19370 val = ~val;
19371 bit = val & -val;
19372 nb = exact_log2 (bit);
19373 ne = exact_log2 (val + bit);
19375 else
19377 bit = val & -val;
19378 ne = exact_log2 (bit);
19379 if (val + bit == 0)
19380 nb = n;
19381 else
19382 nb = 0;
19385 nb--;
19387 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
19388 return false;
19390 if (b)
19391 *b = nb;
19392 if (e)
19393 *e = ne;
19395 return true;
19398 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
19399 or rldicr instruction, to implement an AND with it in mode MODE. */
19401 bool
19402 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
19404 int nb, ne;
19406 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19407 return false;
19409 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
19410 does not wrap. */
19411 if (mode == DImode)
19412 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
19414 /* For SImode, rlwinm can do everything. */
19415 if (mode == SImode)
19416 return (nb < 32 && ne < 32);
19418 return false;
19421 /* Return the instruction template for an AND with mask in mode MODE, with
19422 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19424 const char *
19425 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
19427 int nb, ne;
19429 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
19430 gcc_unreachable ();
19432 if (mode == DImode && ne == 0)
19434 operands[3] = GEN_INT (63 - nb);
19435 if (dot)
19436 return "rldicl. %0,%1,0,%3";
19437 return "rldicl %0,%1,0,%3";
19440 if (mode == DImode && nb == 63)
19442 operands[3] = GEN_INT (63 - ne);
19443 if (dot)
19444 return "rldicr. %0,%1,0,%3";
19445 return "rldicr %0,%1,0,%3";
19448 if (nb < 32 && ne < 32)
19450 operands[3] = GEN_INT (31 - nb);
19451 operands[4] = GEN_INT (31 - ne);
19452 if (dot)
19453 return "rlwinm. %0,%1,0,%3,%4";
19454 return "rlwinm %0,%1,0,%3,%4";
19457 gcc_unreachable ();
19460 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
19461 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
19462 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
19464 bool
19465 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
19467 int nb, ne;
19469 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19470 return false;
19472 int n = GET_MODE_PRECISION (mode);
19473 int sh = -1;
19475 if (CONST_INT_P (XEXP (shift, 1)))
19477 sh = INTVAL (XEXP (shift, 1));
19478 if (sh < 0 || sh >= n)
19479 return false;
19482 rtx_code code = GET_CODE (shift);
19484 /* Convert any shift by 0 to a rotate, to simplify below code. */
19485 if (sh == 0)
19486 code = ROTATE;
19488 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19489 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19490 code = ASHIFT;
19491 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19493 code = LSHIFTRT;
19494 sh = n - sh;
19497 /* DImode rotates need rld*. */
19498 if (mode == DImode && code == ROTATE)
19499 return (nb == 63 || ne == 0 || ne == sh);
19501 /* SImode rotates need rlw*. */
19502 if (mode == SImode && code == ROTATE)
19503 return (nb < 32 && ne < 32 && sh < 32);
19505 /* Wrap-around masks are only okay for rotates. */
19506 if (ne > nb)
19507 return false;
19509 /* Variable shifts are only okay for rotates. */
19510 if (sh < 0)
19511 return false;
19513 /* Don't allow ASHIFT if the mask is wrong for that. */
19514 if (code == ASHIFT && ne < sh)
19515 return false;
19517 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
19518 if the mask is wrong for that. */
19519 if (nb < 32 && ne < 32 && sh < 32
19520 && !(code == LSHIFTRT && nb >= 32 - sh))
19521 return true;
19523 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
19524 if the mask is wrong for that. */
19525 if (code == LSHIFTRT)
19526 sh = 64 - sh;
19527 if (nb == 63 || ne == 0 || ne == sh)
19528 return !(code == LSHIFTRT && nb >= sh);
19530 return false;
19533 /* Return the instruction template for a shift with mask in mode MODE, with
19534 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19536 const char *
19537 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
19539 int nb, ne;
19541 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19542 gcc_unreachable ();
19544 if (mode == DImode && ne == 0)
19546 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19547 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
19548 operands[3] = GEN_INT (63 - nb);
19549 if (dot)
19550 return "rld%I2cl. %0,%1,%2,%3";
19551 return "rld%I2cl %0,%1,%2,%3";
19554 if (mode == DImode && nb == 63)
19556 operands[3] = GEN_INT (63 - ne);
19557 if (dot)
19558 return "rld%I2cr. %0,%1,%2,%3";
19559 return "rld%I2cr %0,%1,%2,%3";
19562 if (mode == DImode
19563 && GET_CODE (operands[4]) != LSHIFTRT
19564 && CONST_INT_P (operands[2])
19565 && ne == INTVAL (operands[2]))
19567 operands[3] = GEN_INT (63 - nb);
19568 if (dot)
19569 return "rld%I2c. %0,%1,%2,%3";
19570 return "rld%I2c %0,%1,%2,%3";
19573 if (nb < 32 && ne < 32)
19575 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19576 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19577 operands[3] = GEN_INT (31 - nb);
19578 operands[4] = GEN_INT (31 - ne);
19579 /* This insn can also be a 64-bit rotate with mask that really makes
19580 it just a shift right (with mask); the %h below are to adjust for
19581 that situation (shift count is >= 32 in that case). */
19582 if (dot)
19583 return "rlw%I2nm. %0,%1,%h2,%3,%4";
19584 return "rlw%I2nm %0,%1,%h2,%3,%4";
19587 gcc_unreachable ();
19590 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19591 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19592 ASHIFT, or LSHIFTRT) in mode MODE. */
19594 bool
19595 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19597 int nb, ne;
19599 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19600 return false;
19602 int n = GET_MODE_PRECISION (mode);
19604 int sh = INTVAL (XEXP (shift, 1));
19605 if (sh < 0 || sh >= n)
19606 return false;
19608 rtx_code code = GET_CODE (shift);
19610 /* Convert any shift by 0 to a rotate, to simplify below code. */
19611 if (sh == 0)
19612 code = ROTATE;
19614 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19615 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19616 code = ASHIFT;
19617 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19619 code = LSHIFTRT;
19620 sh = n - sh;
19623 /* DImode rotates need rldimi. */
19624 if (mode == DImode && code == ROTATE)
19625 return (ne == sh);
19627 /* SImode rotates need rlwimi. */
19628 if (mode == SImode && code == ROTATE)
19629 return (nb < 32 && ne < 32 && sh < 32);
19631 /* Wrap-around masks are only okay for rotates. */
19632 if (ne > nb)
19633 return false;
19635 /* Don't allow ASHIFT if the mask is wrong for that. */
19636 if (code == ASHIFT && ne < sh)
19637 return false;
19639 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19640 if the mask is wrong for that. */
19641 if (nb < 32 && ne < 32 && sh < 32
19642 && !(code == LSHIFTRT && nb >= 32 - sh))
19643 return true;
19645 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19646 if the mask is wrong for that. */
19647 if (code == LSHIFTRT)
19648 sh = 64 - sh;
19649 if (ne == sh)
19650 return !(code == LSHIFTRT && nb >= sh);
19652 return false;
19655 /* Return the instruction template for an insert with mask in mode MODE, with
19656 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19658 const char *
19659 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19661 int nb, ne;
19663 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19664 gcc_unreachable ();
19666 /* Prefer rldimi because rlwimi is cracked. */
19667 if (TARGET_POWERPC64
19668 && (!dot || mode == DImode)
19669 && GET_CODE (operands[4]) != LSHIFTRT
19670 && ne == INTVAL (operands[2]))
19672 operands[3] = GEN_INT (63 - nb);
19673 if (dot)
19674 return "rldimi. %0,%1,%2,%3";
19675 return "rldimi %0,%1,%2,%3";
19678 if (nb < 32 && ne < 32)
19680 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19681 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19682 operands[3] = GEN_INT (31 - nb);
19683 operands[4] = GEN_INT (31 - ne);
19684 if (dot)
19685 return "rlwimi. %0,%1,%2,%3,%4";
19686 return "rlwimi %0,%1,%2,%3,%4";
19689 gcc_unreachable ();
19692 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19693 using two machine instructions. */
19695 bool
19696 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19698 /* There are two kinds of AND we can handle with two insns:
19699 1) those we can do with two rl* insn;
19700 2) ori[s];xori[s].
19702 We do not handle that last case yet. */
19704 /* If there is just one stretch of ones, we can do it. */
19705 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19706 return true;
19708 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19709 one insn, we can do the whole thing with two. */
19710 unsigned HOST_WIDE_INT val = INTVAL (c);
19711 unsigned HOST_WIDE_INT bit1 = val & -val;
19712 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19713 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19714 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19715 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19718 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19719 If EXPAND is true, split rotate-and-mask instructions we generate to
19720 their constituent parts as well (this is used during expand); if DOT
19721 is 1, make the last insn a record-form instruction clobbering the
19722 destination GPR and setting the CC reg (from operands[3]); if 2, set
19723 that GPR as well as the CC reg. */
19725 void
19726 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19728 gcc_assert (!(expand && dot));
19730 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19732 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19733 shift right. This generates better code than doing the masks without
19734 shifts, or shifting first right and then left. */
19735 int nb, ne;
19736 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19738 gcc_assert (mode == DImode);
19740 int shift = 63 - nb;
19741 if (expand)
19743 rtx tmp1 = gen_reg_rtx (DImode);
19744 rtx tmp2 = gen_reg_rtx (DImode);
19745 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19746 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19747 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19749 else
19751 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19752 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19753 emit_move_insn (operands[0], tmp);
19754 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19755 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19757 return;
19760 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19761 that does the rest. */
19762 unsigned HOST_WIDE_INT bit1 = val & -val;
19763 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19764 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19765 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19767 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19768 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19770 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19772 /* Two "no-rotate"-and-mask instructions, for SImode. */
19773 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19775 gcc_assert (mode == SImode);
19777 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19778 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19779 emit_move_insn (reg, tmp);
19780 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19781 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19782 return;
19785 gcc_assert (mode == DImode);
19787 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19788 insns; we have to do the first in SImode, because it wraps. */
19789 if (mask2 <= 0xffffffff
19790 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19792 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19793 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19794 GEN_INT (mask1));
19795 rtx reg_low = gen_lowpart (SImode, reg);
19796 emit_move_insn (reg_low, tmp);
19797 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19798 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19799 return;
19802 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19803 at the top end), rotate back and clear the other hole. */
19804 int right = exact_log2 (bit3);
19805 int left = 64 - right;
19807 /* Rotate the mask too. */
19808 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19810 if (expand)
19812 rtx tmp1 = gen_reg_rtx (DImode);
19813 rtx tmp2 = gen_reg_rtx (DImode);
19814 rtx tmp3 = gen_reg_rtx (DImode);
19815 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19816 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19817 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19818 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19820 else
19822 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19823 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19824 emit_move_insn (operands[0], tmp);
19825 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19826 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19827 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19831 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19832 for lfq and stfq insns iff the registers are hard registers. */
19835 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
19837 /* We might have been passed a SUBREG. */
19838 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
19839 return 0;
19841 /* We might have been passed non floating point registers. */
19842 if (!FP_REGNO_P (REGNO (reg1))
19843 || !FP_REGNO_P (REGNO (reg2)))
19844 return 0;
19846 return (REGNO (reg1) == REGNO (reg2) - 1);
19849 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19850 addr1 and addr2 must be in consecutive memory locations
19851 (addr2 == addr1 + 8). */
19854 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
19856 rtx addr1, addr2;
19857 unsigned int reg1, reg2;
19858 int offset1, offset2;
19860 /* The mems cannot be volatile. */
19861 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
19862 return 0;
19864 addr1 = XEXP (mem1, 0);
19865 addr2 = XEXP (mem2, 0);
19867 /* Extract an offset (if used) from the first addr. */
19868 if (GET_CODE (addr1) == PLUS)
19870 /* If not a REG, return zero. */
19871 if (GET_CODE (XEXP (addr1, 0)) != REG)
19872 return 0;
19873 else
19875 reg1 = REGNO (XEXP (addr1, 0));
19876 /* The offset must be constant! */
19877 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
19878 return 0;
19879 offset1 = INTVAL (XEXP (addr1, 1));
19882 else if (GET_CODE (addr1) != REG)
19883 return 0;
19884 else
19886 reg1 = REGNO (addr1);
19887 /* This was a simple (mem (reg)) expression. Offset is 0. */
19888 offset1 = 0;
19891 /* And now for the second addr. */
19892 if (GET_CODE (addr2) == PLUS)
19894 /* If not a REG, return zero. */
19895 if (GET_CODE (XEXP (addr2, 0)) != REG)
19896 return 0;
19897 else
19899 reg2 = REGNO (XEXP (addr2, 0));
19900 /* The offset must be constant. */
19901 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
19902 return 0;
19903 offset2 = INTVAL (XEXP (addr2, 1));
19906 else if (GET_CODE (addr2) != REG)
19907 return 0;
19908 else
19910 reg2 = REGNO (addr2);
19911 /* This was a simple (mem (reg)) expression. Offset is 0. */
19912 offset2 = 0;
19915 /* Both of these must have the same base register. */
19916 if (reg1 != reg2)
19917 return 0;
19919 /* The offset for the second addr must be 8 more than the first addr. */
19920 if (offset2 != offset1 + 8)
19921 return 0;
19923 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19924 instructions. */
19925 return 1;
19930 rs6000_secondary_memory_needed_rtx (machine_mode mode)
19932 static bool eliminated = false;
19933 rtx ret;
19935 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
19936 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19937 else
19939 rtx mem = cfun->machine->sdmode_stack_slot;
19940 gcc_assert (mem != NULL_RTX);
19942 if (!eliminated)
19944 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
19945 cfun->machine->sdmode_stack_slot = mem;
19946 eliminated = true;
19948 ret = mem;
19951 if (TARGET_DEBUG_ADDR)
19953 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
19954 GET_MODE_NAME (mode));
19955 if (!ret)
19956 fprintf (stderr, "\tNULL_RTX\n");
19957 else
19958 debug_rtx (ret);
19961 return ret;
19964 /* Return the mode to be used for memory when a secondary memory
19965 location is needed. For SDmode values we need to use DDmode, in
19966 all other cases we can use the same mode. */
19967 machine_mode
19968 rs6000_secondary_memory_needed_mode (machine_mode mode)
19970 if (lra_in_progress && mode == SDmode)
19971 return DDmode;
19972 return mode;
19975 static tree
19976 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
19978 /* Don't walk into types. */
19979 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
19981 *walk_subtrees = 0;
19982 return NULL_TREE;
19985 switch (TREE_CODE (*tp))
19987 case VAR_DECL:
19988 case PARM_DECL:
19989 case FIELD_DECL:
19990 case RESULT_DECL:
19991 case SSA_NAME:
19992 case REAL_CST:
19993 case MEM_REF:
19994 case VIEW_CONVERT_EXPR:
19995 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
19996 return *tp;
19997 break;
19998 default:
19999 break;
20002 return NULL_TREE;
20005 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
20006 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
20007 only work on the traditional altivec registers, note if an altivec register
20008 was chosen. */
20010 static enum rs6000_reg_type
20011 register_to_reg_type (rtx reg, bool *is_altivec)
20013 HOST_WIDE_INT regno;
20014 enum reg_class rclass;
20016 if (GET_CODE (reg) == SUBREG)
20017 reg = SUBREG_REG (reg);
20019 if (!REG_P (reg))
20020 return NO_REG_TYPE;
20022 regno = REGNO (reg);
20023 if (regno >= FIRST_PSEUDO_REGISTER)
20025 if (!lra_in_progress && !reload_in_progress && !reload_completed)
20026 return PSEUDO_REG_TYPE;
20028 regno = true_regnum (reg);
20029 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
20030 return PSEUDO_REG_TYPE;
20033 gcc_assert (regno >= 0);
20035 if (is_altivec && ALTIVEC_REGNO_P (regno))
20036 *is_altivec = true;
20038 rclass = rs6000_regno_regclass[regno];
20039 return reg_class_to_reg_type[(int)rclass];
20042 /* Helper function to return the cost of adding a TOC entry address. */
20044 static inline int
20045 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
20047 int ret;
20049 if (TARGET_CMODEL != CMODEL_SMALL)
20050 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
20052 else
20053 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
20055 return ret;
20058 /* Helper function for rs6000_secondary_reload to determine whether the memory
20059 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
20060 needs reloading. Return negative if the memory is not handled by the memory
20061 helper functions and to try a different reload method, 0 if no additional
20062 instructions are need, and positive to give the extra cost for the
20063 memory. */
20065 static int
20066 rs6000_secondary_reload_memory (rtx addr,
20067 enum reg_class rclass,
20068 machine_mode mode)
20070 int extra_cost = 0;
20071 rtx reg, and_arg, plus_arg0, plus_arg1;
20072 addr_mask_type addr_mask;
20073 const char *type = NULL;
20074 const char *fail_msg = NULL;
20076 if (GPR_REG_CLASS_P (rclass))
20077 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20079 else if (rclass == FLOAT_REGS)
20080 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20082 else if (rclass == ALTIVEC_REGS)
20083 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20085 /* For the combined VSX_REGS, turn off Altivec AND -16. */
20086 else if (rclass == VSX_REGS)
20087 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
20088 & ~RELOAD_REG_AND_M16);
20090 /* If the register allocator hasn't made up its mind yet on the register
20091 class to use, settle on defaults to use. */
20092 else if (rclass == NO_REGS)
20094 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
20095 & ~RELOAD_REG_AND_M16);
20097 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
20098 addr_mask &= ~(RELOAD_REG_INDEXED
20099 | RELOAD_REG_PRE_INCDEC
20100 | RELOAD_REG_PRE_MODIFY);
20103 else
20104 addr_mask = 0;
20106 /* If the register isn't valid in this register class, just return now. */
20107 if ((addr_mask & RELOAD_REG_VALID) == 0)
20109 if (TARGET_DEBUG_ADDR)
20111 fprintf (stderr,
20112 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20113 "not valid in class\n",
20114 GET_MODE_NAME (mode), reg_class_names[rclass]);
20115 debug_rtx (addr);
20118 return -1;
20121 switch (GET_CODE (addr))
20123 /* Does the register class supports auto update forms for this mode? We
20124 don't need a scratch register, since the powerpc only supports
20125 PRE_INC, PRE_DEC, and PRE_MODIFY. */
20126 case PRE_INC:
20127 case PRE_DEC:
20128 reg = XEXP (addr, 0);
20129 if (!base_reg_operand (addr, GET_MODE (reg)))
20131 fail_msg = "no base register #1";
20132 extra_cost = -1;
20135 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20137 extra_cost = 1;
20138 type = "update";
20140 break;
20142 case PRE_MODIFY:
20143 reg = XEXP (addr, 0);
20144 plus_arg1 = XEXP (addr, 1);
20145 if (!base_reg_operand (reg, GET_MODE (reg))
20146 || GET_CODE (plus_arg1) != PLUS
20147 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
20149 fail_msg = "bad PRE_MODIFY";
20150 extra_cost = -1;
20153 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20155 extra_cost = 1;
20156 type = "update";
20158 break;
20160 /* Do we need to simulate AND -16 to clear the bottom address bits used
20161 in VMX load/stores? Only allow the AND for vector sizes. */
20162 case AND:
20163 and_arg = XEXP (addr, 0);
20164 if (GET_MODE_SIZE (mode) != 16
20165 || GET_CODE (XEXP (addr, 1)) != CONST_INT
20166 || INTVAL (XEXP (addr, 1)) != -16)
20168 fail_msg = "bad Altivec AND #1";
20169 extra_cost = -1;
20172 if (rclass != ALTIVEC_REGS)
20174 if (legitimate_indirect_address_p (and_arg, false))
20175 extra_cost = 1;
20177 else if (legitimate_indexed_address_p (and_arg, false))
20178 extra_cost = 2;
20180 else
20182 fail_msg = "bad Altivec AND #2";
20183 extra_cost = -1;
20186 type = "and";
20188 break;
20190 /* If this is an indirect address, make sure it is a base register. */
20191 case REG:
20192 case SUBREG:
20193 if (!legitimate_indirect_address_p (addr, false))
20195 extra_cost = 1;
20196 type = "move";
20198 break;
20200 /* If this is an indexed address, make sure the register class can handle
20201 indexed addresses for this mode. */
20202 case PLUS:
20203 plus_arg0 = XEXP (addr, 0);
20204 plus_arg1 = XEXP (addr, 1);
20206 /* (plus (plus (reg) (constant)) (constant)) is generated during
20207 push_reload processing, so handle it now. */
20208 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
20210 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20212 extra_cost = 1;
20213 type = "offset";
20217 /* (plus (plus (reg) (constant)) (reg)) is also generated during
20218 push_reload processing, so handle it now. */
20219 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
20221 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20223 extra_cost = 1;
20224 type = "indexed #2";
20228 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
20230 fail_msg = "no base register #2";
20231 extra_cost = -1;
20234 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
20236 if ((addr_mask & RELOAD_REG_INDEXED) == 0
20237 || !legitimate_indexed_address_p (addr, false))
20239 extra_cost = 1;
20240 type = "indexed";
20244 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
20245 && CONST_INT_P (plus_arg1))
20247 if (!quad_address_offset_p (INTVAL (plus_arg1)))
20249 extra_cost = 1;
20250 type = "vector d-form offset";
20254 /* Make sure the register class can handle offset addresses. */
20255 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20257 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20259 extra_cost = 1;
20260 type = "offset #2";
20264 else
20266 fail_msg = "bad PLUS";
20267 extra_cost = -1;
20270 break;
20272 case LO_SUM:
20273 /* Quad offsets are restricted and can't handle normal addresses. */
20274 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20276 extra_cost = -1;
20277 type = "vector d-form lo_sum";
20280 else if (!legitimate_lo_sum_address_p (mode, addr, false))
20282 fail_msg = "bad LO_SUM";
20283 extra_cost = -1;
20286 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20288 extra_cost = 1;
20289 type = "lo_sum";
20291 break;
20293 /* Static addresses need to create a TOC entry. */
20294 case CONST:
20295 case SYMBOL_REF:
20296 case LABEL_REF:
20297 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20299 extra_cost = -1;
20300 type = "vector d-form lo_sum #2";
20303 else
20305 type = "address";
20306 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
20308 break;
20310 /* TOC references look like offsetable memory. */
20311 case UNSPEC:
20312 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
20314 fail_msg = "bad UNSPEC";
20315 extra_cost = -1;
20318 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20320 extra_cost = -1;
20321 type = "vector d-form lo_sum #3";
20324 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20326 extra_cost = 1;
20327 type = "toc reference";
20329 break;
20331 default:
20333 fail_msg = "bad address";
20334 extra_cost = -1;
20338 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
20340 if (extra_cost < 0)
20341 fprintf (stderr,
20342 "rs6000_secondary_reload_memory error: mode = %s, "
20343 "class = %s, addr_mask = '%s', %s\n",
20344 GET_MODE_NAME (mode),
20345 reg_class_names[rclass],
20346 rs6000_debug_addr_mask (addr_mask, false),
20347 (fail_msg != NULL) ? fail_msg : "<bad address>");
20349 else
20350 fprintf (stderr,
20351 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20352 "addr_mask = '%s', extra cost = %d, %s\n",
20353 GET_MODE_NAME (mode),
20354 reg_class_names[rclass],
20355 rs6000_debug_addr_mask (addr_mask, false),
20356 extra_cost,
20357 (type) ? type : "<none>");
20359 debug_rtx (addr);
20362 return extra_cost;
20365 /* Helper function for rs6000_secondary_reload to return true if a move to a
20366 different register classe is really a simple move. */
20368 static bool
20369 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
20370 enum rs6000_reg_type from_type,
20371 machine_mode mode)
20373 int size;
20375 /* Add support for various direct moves available. In this function, we only
20376 look at cases where we don't need any extra registers, and one or more
20377 simple move insns are issued. At present, 32-bit integers are not allowed
20378 in FPR/VSX registers. Single precision binary floating is not a simple
20379 move because we need to convert to the single precision memory layout.
20380 The 4-byte SDmode can be moved. TDmode values are disallowed since they
20381 need special direct move handling, which we do not support yet. */
20382 size = GET_MODE_SIZE (mode);
20383 if (TARGET_DIRECT_MOVE
20384 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
20385 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20386 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
20387 return true;
20389 else if (TARGET_DIRECT_MOVE_128 && size == 16 && mode != TDmode
20390 && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20391 || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
20392 return true;
20394 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
20395 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
20396 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20397 return true;
20399 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
20400 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
20401 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20402 return true;
20404 return false;
20407 /* Direct move helper function for rs6000_secondary_reload, handle all of the
20408 special direct moves that involve allocating an extra register, return the
20409 insn code of the helper function if there is such a function or
20410 CODE_FOR_nothing if not. */
20412 static bool
20413 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
20414 enum rs6000_reg_type from_type,
20415 machine_mode mode,
20416 secondary_reload_info *sri,
20417 bool altivec_p)
20419 bool ret = false;
20420 enum insn_code icode = CODE_FOR_nothing;
20421 int cost = 0;
20422 int size = GET_MODE_SIZE (mode);
20424 if (TARGET_POWERPC64 && size == 16)
20426 /* Handle moving 128-bit values from GPRs to VSX point registers on
20427 ISA 2.07 (power8, power9) when running in 64-bit mode using
20428 XXPERMDI to glue the two 64-bit values back together. */
20429 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20431 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
20432 icode = reg_addr[mode].reload_vsx_gpr;
20435 /* Handle moving 128-bit values from VSX point registers to GPRs on
20436 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
20437 bottom 64-bit value. */
20438 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20440 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
20441 icode = reg_addr[mode].reload_gpr_vsx;
20445 else if (TARGET_POWERPC64 && mode == SFmode)
20447 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20449 cost = 3; /* xscvdpspn, mfvsrd, and. */
20450 icode = reg_addr[mode].reload_gpr_vsx;
20453 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20455 cost = 2; /* mtvsrz, xscvspdpn. */
20456 icode = reg_addr[mode].reload_vsx_gpr;
20460 else if (!TARGET_POWERPC64 && size == 8)
20462 /* Handle moving 64-bit values from GPRs to floating point registers on
20463 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
20464 32-bit values back together. Altivec register classes must be handled
20465 specially since a different instruction is used, and the secondary
20466 reload support requires a single instruction class in the scratch
20467 register constraint. However, right now TFmode is not allowed in
20468 Altivec registers, so the pattern will never match. */
20469 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
20471 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
20472 icode = reg_addr[mode].reload_fpr_gpr;
20476 if (icode != CODE_FOR_nothing)
20478 ret = true;
20479 if (sri)
20481 sri->icode = icode;
20482 sri->extra_cost = cost;
20486 return ret;
20489 /* Return whether a move between two register classes can be done either
20490 directly (simple move) or via a pattern that uses a single extra temporary
20491 (using ISA 2.07's direct move in this case. */
20493 static bool
20494 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
20495 enum rs6000_reg_type from_type,
20496 machine_mode mode,
20497 secondary_reload_info *sri,
20498 bool altivec_p)
20500 /* Fall back to load/store reloads if either type is not a register. */
20501 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
20502 return false;
20504 /* If we haven't allocated registers yet, assume the move can be done for the
20505 standard register types. */
20506 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
20507 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
20508 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
20509 return true;
20511 /* Moves to the same set of registers is a simple move for non-specialized
20512 registers. */
20513 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
20514 return true;
20516 /* Check whether a simple move can be done directly. */
20517 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
20519 if (sri)
20521 sri->icode = CODE_FOR_nothing;
20522 sri->extra_cost = 0;
20524 return true;
20527 /* Now check if we can do it in a few steps. */
20528 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
20529 altivec_p);
20532 /* Inform reload about cases where moving X with a mode MODE to a register in
20533 RCLASS requires an extra scratch or immediate register. Return the class
20534 needed for the immediate register.
20536 For VSX and Altivec, we may need a register to convert sp+offset into
20537 reg+sp.
20539 For misaligned 64-bit gpr loads and stores we need a register to
20540 convert an offset address to indirect. */
20542 static reg_class_t
20543 rs6000_secondary_reload (bool in_p,
20544 rtx x,
20545 reg_class_t rclass_i,
20546 machine_mode mode,
20547 secondary_reload_info *sri)
20549 enum reg_class rclass = (enum reg_class) rclass_i;
20550 reg_class_t ret = ALL_REGS;
20551 enum insn_code icode;
20552 bool default_p = false;
20553 bool done_p = false;
20555 /* Allow subreg of memory before/during reload. */
20556 bool memory_p = (MEM_P (x)
20557 || (!reload_completed && GET_CODE (x) == SUBREG
20558 && MEM_P (SUBREG_REG (x))));
20560 sri->icode = CODE_FOR_nothing;
20561 sri->t_icode = CODE_FOR_nothing;
20562 sri->extra_cost = 0;
20563 icode = ((in_p)
20564 ? reg_addr[mode].reload_load
20565 : reg_addr[mode].reload_store);
20567 if (REG_P (x) || register_operand (x, mode))
20569 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
20570 bool altivec_p = (rclass == ALTIVEC_REGS);
20571 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
20573 if (!in_p)
20575 enum rs6000_reg_type exchange = to_type;
20576 to_type = from_type;
20577 from_type = exchange;
20580 /* Can we do a direct move of some sort? */
20581 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
20582 altivec_p))
20584 icode = (enum insn_code)sri->icode;
20585 default_p = false;
20586 done_p = true;
20587 ret = NO_REGS;
20591 /* Make sure 0.0 is not reloaded or forced into memory. */
20592 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
20594 ret = NO_REGS;
20595 default_p = false;
20596 done_p = true;
20599 /* If this is a scalar floating point value and we want to load it into the
20600 traditional Altivec registers, do it via a move via a traditional floating
20601 point register, unless we have D-form addressing. Also make sure that
20602 non-zero constants use a FPR. */
20603 if (!done_p && reg_addr[mode].scalar_in_vmx_p
20604 && !mode_supports_vmx_dform (mode)
20605 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20606 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
20608 ret = FLOAT_REGS;
20609 default_p = false;
20610 done_p = true;
20613 /* Handle reload of load/stores if we have reload helper functions. */
20614 if (!done_p && icode != CODE_FOR_nothing && memory_p)
20616 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
20617 mode);
20619 if (extra_cost >= 0)
20621 done_p = true;
20622 ret = NO_REGS;
20623 if (extra_cost > 0)
20625 sri->extra_cost = extra_cost;
20626 sri->icode = icode;
20631 /* Handle unaligned loads and stores of integer registers. */
20632 if (!done_p && TARGET_POWERPC64
20633 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20634 && memory_p
20635 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
20637 rtx addr = XEXP (x, 0);
20638 rtx off = address_offset (addr);
20640 if (off != NULL_RTX)
20642 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20643 unsigned HOST_WIDE_INT offset = INTVAL (off);
20645 /* We need a secondary reload when our legitimate_address_p
20646 says the address is good (as otherwise the entire address
20647 will be reloaded), and the offset is not a multiple of
20648 four or we have an address wrap. Address wrap will only
20649 occur for LO_SUMs since legitimate_offset_address_p
20650 rejects addresses for 16-byte mems that will wrap. */
20651 if (GET_CODE (addr) == LO_SUM
20652 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20653 && ((offset & 3) != 0
20654 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20655 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20656 && (offset & 3) != 0))
20658 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20659 if (in_p)
20660 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20661 : CODE_FOR_reload_di_load);
20662 else
20663 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20664 : CODE_FOR_reload_di_store);
20665 sri->extra_cost = 2;
20666 ret = NO_REGS;
20667 done_p = true;
20669 else
20670 default_p = true;
20672 else
20673 default_p = true;
20676 if (!done_p && !TARGET_POWERPC64
20677 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20678 && memory_p
20679 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20681 rtx addr = XEXP (x, 0);
20682 rtx off = address_offset (addr);
20684 if (off != NULL_RTX)
20686 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20687 unsigned HOST_WIDE_INT offset = INTVAL (off);
20689 /* We need a secondary reload when our legitimate_address_p
20690 says the address is good (as otherwise the entire address
20691 will be reloaded), and we have a wrap.
20693 legitimate_lo_sum_address_p allows LO_SUM addresses to
20694 have any offset so test for wrap in the low 16 bits.
20696 legitimate_offset_address_p checks for the range
20697 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20698 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20699 [0x7ff4,0x7fff] respectively, so test for the
20700 intersection of these ranges, [0x7ffc,0x7fff] and
20701 [0x7ff4,0x7ff7] respectively.
20703 Note that the address we see here may have been
20704 manipulated by legitimize_reload_address. */
20705 if (GET_CODE (addr) == LO_SUM
20706 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20707 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20709 if (in_p)
20710 sri->icode = CODE_FOR_reload_si_load;
20711 else
20712 sri->icode = CODE_FOR_reload_si_store;
20713 sri->extra_cost = 2;
20714 ret = NO_REGS;
20715 done_p = true;
20717 else
20718 default_p = true;
20720 else
20721 default_p = true;
20724 if (!done_p)
20725 default_p = true;
20727 if (default_p)
20728 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20730 gcc_assert (ret != ALL_REGS);
20732 if (TARGET_DEBUG_ADDR)
20734 fprintf (stderr,
20735 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20736 "mode = %s",
20737 reg_class_names[ret],
20738 in_p ? "true" : "false",
20739 reg_class_names[rclass],
20740 GET_MODE_NAME (mode));
20742 if (reload_completed)
20743 fputs (", after reload", stderr);
20745 if (!done_p)
20746 fputs (", done_p not set", stderr);
20748 if (default_p)
20749 fputs (", default secondary reload", stderr);
20751 if (sri->icode != CODE_FOR_nothing)
20752 fprintf (stderr, ", reload func = %s, extra cost = %d",
20753 insn_data[sri->icode].name, sri->extra_cost);
20755 else if (sri->extra_cost > 0)
20756 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20758 fputs ("\n", stderr);
20759 debug_rtx (x);
20762 return ret;
20765 /* Better tracing for rs6000_secondary_reload_inner. */
20767 static void
20768 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20769 bool store_p)
20771 rtx set, clobber;
20773 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20775 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20776 store_p ? "store" : "load");
20778 if (store_p)
20779 set = gen_rtx_SET (mem, reg);
20780 else
20781 set = gen_rtx_SET (reg, mem);
20783 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20784 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20787 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20788 ATTRIBUTE_NORETURN;
20790 static void
20791 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20792 bool store_p)
20794 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20795 gcc_unreachable ();
20798 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20799 reload helper functions. These were identified in
20800 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20801 reload, it calls the insns:
20802 reload_<RELOAD:mode>_<P:mptrsize>_store
20803 reload_<RELOAD:mode>_<P:mptrsize>_load
20805 which in turn calls this function, to do whatever is necessary to create
20806 valid addresses. */
20808 void
20809 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20811 int regno = true_regnum (reg);
20812 machine_mode mode = GET_MODE (reg);
20813 addr_mask_type addr_mask;
20814 rtx addr;
20815 rtx new_addr;
20816 rtx op_reg, op0, op1;
20817 rtx and_op;
20818 rtx cc_clobber;
20819 rtvec rv;
20821 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
20822 || !base_reg_operand (scratch, GET_MODE (scratch)))
20823 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20825 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
20826 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20828 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
20829 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20831 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
20832 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20834 else
20835 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20837 /* Make sure the mode is valid in this register class. */
20838 if ((addr_mask & RELOAD_REG_VALID) == 0)
20839 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20841 if (TARGET_DEBUG_ADDR)
20842 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
20844 new_addr = addr = XEXP (mem, 0);
20845 switch (GET_CODE (addr))
20847 /* Does the register class support auto update forms for this mode? If
20848 not, do the update now. We don't need a scratch register, since the
20849 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20850 case PRE_INC:
20851 case PRE_DEC:
20852 op_reg = XEXP (addr, 0);
20853 if (!base_reg_operand (op_reg, Pmode))
20854 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20856 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20858 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
20859 new_addr = op_reg;
20861 break;
20863 case PRE_MODIFY:
20864 op0 = XEXP (addr, 0);
20865 op1 = XEXP (addr, 1);
20866 if (!base_reg_operand (op0, Pmode)
20867 || GET_CODE (op1) != PLUS
20868 || !rtx_equal_p (op0, XEXP (op1, 0)))
20869 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20871 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20873 emit_insn (gen_rtx_SET (op0, op1));
20874 new_addr = reg;
20876 break;
20878 /* Do we need to simulate AND -16 to clear the bottom address bits used
20879 in VMX load/stores? */
20880 case AND:
20881 op0 = XEXP (addr, 0);
20882 op1 = XEXP (addr, 1);
20883 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
20885 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
20886 op_reg = op0;
20888 else if (GET_CODE (op1) == PLUS)
20890 emit_insn (gen_rtx_SET (scratch, op1));
20891 op_reg = scratch;
20894 else
20895 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20897 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
20898 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
20899 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
20900 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
20901 new_addr = scratch;
20903 break;
20905 /* If this is an indirect address, make sure it is a base register. */
20906 case REG:
20907 case SUBREG:
20908 if (!base_reg_operand (addr, GET_MODE (addr)))
20910 emit_insn (gen_rtx_SET (scratch, addr));
20911 new_addr = scratch;
20913 break;
20915 /* If this is an indexed address, make sure the register class can handle
20916 indexed addresses for this mode. */
20917 case PLUS:
20918 op0 = XEXP (addr, 0);
20919 op1 = XEXP (addr, 1);
20920 if (!base_reg_operand (op0, Pmode))
20921 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20923 else if (int_reg_operand (op1, Pmode))
20925 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20927 emit_insn (gen_rtx_SET (scratch, addr));
20928 new_addr = scratch;
20932 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
20934 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
20935 || !quad_address_p (addr, mode, false))
20937 emit_insn (gen_rtx_SET (scratch, addr));
20938 new_addr = scratch;
20942 /* Make sure the register class can handle offset addresses. */
20943 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20945 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20947 emit_insn (gen_rtx_SET (scratch, addr));
20948 new_addr = scratch;
20952 else
20953 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20955 break;
20957 case LO_SUM:
20958 op0 = XEXP (addr, 0);
20959 op1 = XEXP (addr, 1);
20960 if (!base_reg_operand (op0, Pmode))
20961 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20963 else if (int_reg_operand (op1, Pmode))
20965 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20967 emit_insn (gen_rtx_SET (scratch, addr));
20968 new_addr = scratch;
20972 /* Quad offsets are restricted and can't handle normal addresses. */
20973 else if (mode_supports_vsx_dform_quad (mode))
20975 emit_insn (gen_rtx_SET (scratch, addr));
20976 new_addr = scratch;
20979 /* Make sure the register class can handle offset addresses. */
20980 else if (legitimate_lo_sum_address_p (mode, addr, false))
20982 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20984 emit_insn (gen_rtx_SET (scratch, addr));
20985 new_addr = scratch;
20989 else
20990 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20992 break;
20994 case SYMBOL_REF:
20995 case CONST:
20996 case LABEL_REF:
20997 rs6000_emit_move (scratch, addr, Pmode);
20998 new_addr = scratch;
20999 break;
21001 default:
21002 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21005 /* Adjust the address if it changed. */
21006 if (addr != new_addr)
21008 mem = replace_equiv_address_nv (mem, new_addr);
21009 if (TARGET_DEBUG_ADDR)
21010 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
21013 /* Now create the move. */
21014 if (store_p)
21015 emit_insn (gen_rtx_SET (mem, reg));
21016 else
21017 emit_insn (gen_rtx_SET (reg, mem));
21019 return;
21022 /* Convert reloads involving 64-bit gprs and misaligned offset
21023 addressing, or multiple 32-bit gprs and offsets that are too large,
21024 to use indirect addressing. */
21026 void
21027 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
21029 int regno = true_regnum (reg);
21030 enum reg_class rclass;
21031 rtx addr;
21032 rtx scratch_or_premodify = scratch;
21034 if (TARGET_DEBUG_ADDR)
21036 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
21037 store_p ? "store" : "load");
21038 fprintf (stderr, "reg:\n");
21039 debug_rtx (reg);
21040 fprintf (stderr, "mem:\n");
21041 debug_rtx (mem);
21042 fprintf (stderr, "scratch:\n");
21043 debug_rtx (scratch);
21046 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
21047 gcc_assert (GET_CODE (mem) == MEM);
21048 rclass = REGNO_REG_CLASS (regno);
21049 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
21050 addr = XEXP (mem, 0);
21052 if (GET_CODE (addr) == PRE_MODIFY)
21054 gcc_assert (REG_P (XEXP (addr, 0))
21055 && GET_CODE (XEXP (addr, 1)) == PLUS
21056 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
21057 scratch_or_premodify = XEXP (addr, 0);
21058 if (!HARD_REGISTER_P (scratch_or_premodify))
21059 /* If we have a pseudo here then reload will have arranged
21060 to have it replaced, but only in the original insn.
21061 Use the replacement here too. */
21062 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
21064 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
21065 expressions from the original insn, without unsharing them.
21066 Any RTL that points into the original insn will of course
21067 have register replacements applied. That is why we don't
21068 need to look for replacements under the PLUS. */
21069 addr = XEXP (addr, 1);
21071 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
21073 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
21075 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
21077 /* Now create the move. */
21078 if (store_p)
21079 emit_insn (gen_rtx_SET (mem, reg));
21080 else
21081 emit_insn (gen_rtx_SET (reg, mem));
21083 return;
21086 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
21087 this function has any SDmode references. If we are on a power7 or later, we
21088 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
21089 can load/store the value. */
21091 static void
21092 rs6000_alloc_sdmode_stack_slot (void)
21094 tree t;
21095 basic_block bb;
21096 gimple_stmt_iterator gsi;
21098 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
21099 /* We use a different approach for dealing with the secondary
21100 memory in LRA. */
21101 if (ira_use_lra_p)
21102 return;
21104 if (TARGET_NO_SDMODE_STACK)
21105 return;
21107 FOR_EACH_BB_FN (bb, cfun)
21108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21110 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
21111 if (ret)
21113 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21114 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21115 SDmode, 0);
21116 return;
21120 /* Check for any SDmode parameters of the function. */
21121 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
21123 if (TREE_TYPE (t) == error_mark_node)
21124 continue;
21126 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
21127 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
21129 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21130 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21131 SDmode, 0);
21132 return;
21137 static void
21138 rs6000_instantiate_decls (void)
21140 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
21141 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
21144 /* Given an rtx X being reloaded into a reg required to be
21145 in class CLASS, return the class of reg to actually use.
21146 In general this is just CLASS; but on some machines
21147 in some cases it is preferable to use a more restrictive class.
21149 On the RS/6000, we have to return NO_REGS when we want to reload a
21150 floating-point CONST_DOUBLE to force it to be copied to memory.
21152 We also don't want to reload integer values into floating-point
21153 registers if we can at all help it. In fact, this can
21154 cause reload to die, if it tries to generate a reload of CTR
21155 into a FP register and discovers it doesn't have the memory location
21156 required.
21158 ??? Would it be a good idea to have reload do the converse, that is
21159 try to reload floating modes into FP registers if possible?
21162 static enum reg_class
21163 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
21165 machine_mode mode = GET_MODE (x);
21166 bool is_constant = CONSTANT_P (x);
21168 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
21169 reload class for it. */
21170 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21171 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
21172 return NO_REGS;
21174 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
21175 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
21176 return NO_REGS;
21178 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
21179 the reloading of address expressions using PLUS into floating point
21180 registers. */
21181 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
21183 if (is_constant)
21185 /* Zero is always allowed in all VSX registers. */
21186 if (x == CONST0_RTX (mode))
21187 return rclass;
21189 /* If this is a vector constant that can be formed with a few Altivec
21190 instructions, we want altivec registers. */
21191 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
21192 return ALTIVEC_REGS;
21194 /* Force constant to memory. */
21195 return NO_REGS;
21198 /* D-form addressing can easily reload the value. */
21199 if (mode_supports_vmx_dform (mode)
21200 || mode_supports_vsx_dform_quad (mode))
21201 return rclass;
21203 /* If this is a scalar floating point value and we don't have D-form
21204 addressing, prefer the traditional floating point registers so that we
21205 can use D-form (register+offset) addressing. */
21206 if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
21207 return FLOAT_REGS;
21209 /* Prefer the Altivec registers if Altivec is handling the vector
21210 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
21211 loads. */
21212 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
21213 || mode == V1TImode)
21214 return ALTIVEC_REGS;
21216 return rclass;
21219 if (is_constant || GET_CODE (x) == PLUS)
21221 if (reg_class_subset_p (GENERAL_REGS, rclass))
21222 return GENERAL_REGS;
21223 if (reg_class_subset_p (BASE_REGS, rclass))
21224 return BASE_REGS;
21225 return NO_REGS;
21228 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
21229 return GENERAL_REGS;
21231 return rclass;
21234 /* Debug version of rs6000_preferred_reload_class. */
21235 static enum reg_class
21236 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
21238 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
21240 fprintf (stderr,
21241 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
21242 "mode = %s, x:\n",
21243 reg_class_names[ret], reg_class_names[rclass],
21244 GET_MODE_NAME (GET_MODE (x)));
21245 debug_rtx (x);
21247 return ret;
21250 /* If we are copying between FP or AltiVec registers and anything else, we need
21251 a memory location. The exception is when we are targeting ppc64 and the
21252 move to/from fpr to gpr instructions are available. Also, under VSX, you
21253 can copy vector registers from the FP register set to the Altivec register
21254 set and vice versa. */
21256 static bool
21257 rs6000_secondary_memory_needed (enum reg_class from_class,
21258 enum reg_class to_class,
21259 machine_mode mode)
21261 enum rs6000_reg_type from_type, to_type;
21262 bool altivec_p = ((from_class == ALTIVEC_REGS)
21263 || (to_class == ALTIVEC_REGS));
21265 /* If a simple/direct move is available, we don't need secondary memory */
21266 from_type = reg_class_to_reg_type[(int)from_class];
21267 to_type = reg_class_to_reg_type[(int)to_class];
21269 if (rs6000_secondary_reload_move (to_type, from_type, mode,
21270 (secondary_reload_info *)0, altivec_p))
21271 return false;
21273 /* If we have a floating point or vector register class, we need to use
21274 memory to transfer the data. */
21275 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
21276 return true;
21278 return false;
21281 /* Debug version of rs6000_secondary_memory_needed. */
21282 static bool
21283 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
21284 enum reg_class to_class,
21285 machine_mode mode)
21287 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
21289 fprintf (stderr,
21290 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
21291 "to_class = %s, mode = %s\n",
21292 ret ? "true" : "false",
21293 reg_class_names[from_class],
21294 reg_class_names[to_class],
21295 GET_MODE_NAME (mode));
21297 return ret;
21300 /* Return the register class of a scratch register needed to copy IN into
21301 or out of a register in RCLASS in MODE. If it can be done directly,
21302 NO_REGS is returned. */
21304 static enum reg_class
21305 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
21306 rtx in)
21308 int regno;
21310 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
21311 #if TARGET_MACHO
21312 && MACHOPIC_INDIRECT
21313 #endif
21316 /* We cannot copy a symbolic operand directly into anything
21317 other than BASE_REGS for TARGET_ELF. So indicate that a
21318 register from BASE_REGS is needed as an intermediate
21319 register.
21321 On Darwin, pic addresses require a load from memory, which
21322 needs a base register. */
21323 if (rclass != BASE_REGS
21324 && (GET_CODE (in) == SYMBOL_REF
21325 || GET_CODE (in) == HIGH
21326 || GET_CODE (in) == LABEL_REF
21327 || GET_CODE (in) == CONST))
21328 return BASE_REGS;
21331 if (GET_CODE (in) == REG)
21333 regno = REGNO (in);
21334 if (regno >= FIRST_PSEUDO_REGISTER)
21336 regno = true_regnum (in);
21337 if (regno >= FIRST_PSEUDO_REGISTER)
21338 regno = -1;
21341 else if (GET_CODE (in) == SUBREG)
21343 regno = true_regnum (in);
21344 if (regno >= FIRST_PSEUDO_REGISTER)
21345 regno = -1;
21347 else
21348 regno = -1;
21350 /* If we have VSX register moves, prefer moving scalar values between
21351 Altivec registers and GPR by going via an FPR (and then via memory)
21352 instead of reloading the secondary memory address for Altivec moves. */
21353 if (TARGET_VSX
21354 && GET_MODE_SIZE (mode) < 16
21355 && !mode_supports_vmx_dform (mode)
21356 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
21357 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
21358 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
21359 && (regno >= 0 && INT_REGNO_P (regno)))))
21360 return FLOAT_REGS;
21362 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
21363 into anything. */
21364 if (rclass == GENERAL_REGS || rclass == BASE_REGS
21365 || (regno >= 0 && INT_REGNO_P (regno)))
21366 return NO_REGS;
21368 /* Constants, memory, and VSX registers can go into VSX registers (both the
21369 traditional floating point and the altivec registers). */
21370 if (rclass == VSX_REGS
21371 && (regno == -1 || VSX_REGNO_P (regno)))
21372 return NO_REGS;
21374 /* Constants, memory, and FP registers can go into FP registers. */
21375 if ((regno == -1 || FP_REGNO_P (regno))
21376 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
21377 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
21379 /* Memory, and AltiVec registers can go into AltiVec registers. */
21380 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
21381 && rclass == ALTIVEC_REGS)
21382 return NO_REGS;
21384 /* We can copy among the CR registers. */
21385 if ((rclass == CR_REGS || rclass == CR0_REGS)
21386 && regno >= 0 && CR_REGNO_P (regno))
21387 return NO_REGS;
21389 /* Otherwise, we need GENERAL_REGS. */
21390 return GENERAL_REGS;
21393 /* Debug version of rs6000_secondary_reload_class. */
21394 static enum reg_class
21395 rs6000_debug_secondary_reload_class (enum reg_class rclass,
21396 machine_mode mode, rtx in)
21398 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
21399 fprintf (stderr,
21400 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
21401 "mode = %s, input rtx:\n",
21402 reg_class_names[ret], reg_class_names[rclass],
21403 GET_MODE_NAME (mode));
21404 debug_rtx (in);
21406 return ret;
21409 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
21411 static bool
21412 rs6000_cannot_change_mode_class (machine_mode from,
21413 machine_mode to,
21414 enum reg_class rclass)
21416 unsigned from_size = GET_MODE_SIZE (from);
21417 unsigned to_size = GET_MODE_SIZE (to);
21419 if (from_size != to_size)
21421 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
21423 if (reg_classes_intersect_p (xclass, rclass))
21425 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
21426 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
21427 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
21428 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
21430 /* Don't allow 64-bit types to overlap with 128-bit types that take a
21431 single register under VSX because the scalar part of the register
21432 is in the upper 64-bits, and not the lower 64-bits. Types like
21433 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
21434 IEEE floating point can't overlap, and neither can small
21435 values. */
21437 if (to_float128_vector_p && from_float128_vector_p)
21438 return false;
21440 else if (to_float128_vector_p || from_float128_vector_p)
21441 return true;
21443 /* TDmode in floating-mode registers must always go into a register
21444 pair with the most significant word in the even-numbered register
21445 to match ISA requirements. In little-endian mode, this does not
21446 match subreg numbering, so we cannot allow subregs. */
21447 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
21448 return true;
21450 if (from_size < 8 || to_size < 8)
21451 return true;
21453 if (from_size == 8 && (8 * to_nregs) != to_size)
21454 return true;
21456 if (to_size == 8 && (8 * from_nregs) != from_size)
21457 return true;
21459 return false;
21461 else
21462 return false;
21465 if (TARGET_E500_DOUBLE
21466 && ((((to) == DFmode) + ((from) == DFmode)) == 1
21467 || (((to) == TFmode) + ((from) == TFmode)) == 1
21468 || (((to) == IFmode) + ((from) == IFmode)) == 1
21469 || (((to) == KFmode) + ((from) == KFmode)) == 1
21470 || (((to) == DDmode) + ((from) == DDmode)) == 1
21471 || (((to) == TDmode) + ((from) == TDmode)) == 1
21472 || (((to) == DImode) + ((from) == DImode)) == 1))
21473 return true;
21475 /* Since the VSX register set includes traditional floating point registers
21476 and altivec registers, just check for the size being different instead of
21477 trying to check whether the modes are vector modes. Otherwise it won't
21478 allow say DF and DI to change classes. For types like TFmode and TDmode
21479 that take 2 64-bit registers, rather than a single 128-bit register, don't
21480 allow subregs of those types to other 128 bit types. */
21481 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
21483 unsigned num_regs = (from_size + 15) / 16;
21484 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
21485 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
21486 return true;
21488 return (from_size != 8 && from_size != 16);
21491 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
21492 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
21493 return true;
21495 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
21496 && reg_classes_intersect_p (GENERAL_REGS, rclass))
21497 return true;
21499 return false;
21502 /* Debug version of rs6000_cannot_change_mode_class. */
21503 static bool
21504 rs6000_debug_cannot_change_mode_class (machine_mode from,
21505 machine_mode to,
21506 enum reg_class rclass)
21508 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
21510 fprintf (stderr,
21511 "rs6000_cannot_change_mode_class, return %s, from = %s, "
21512 "to = %s, rclass = %s\n",
21513 ret ? "true" : "false",
21514 GET_MODE_NAME (from), GET_MODE_NAME (to),
21515 reg_class_names[rclass]);
21517 return ret;
21520 /* Return a string to do a move operation of 128 bits of data. */
21522 const char *
21523 rs6000_output_move_128bit (rtx operands[])
21525 rtx dest = operands[0];
21526 rtx src = operands[1];
21527 machine_mode mode = GET_MODE (dest);
21528 int dest_regno;
21529 int src_regno;
21530 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
21531 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
21533 if (REG_P (dest))
21535 dest_regno = REGNO (dest);
21536 dest_gpr_p = INT_REGNO_P (dest_regno);
21537 dest_fp_p = FP_REGNO_P (dest_regno);
21538 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
21539 dest_vsx_p = dest_fp_p | dest_vmx_p;
21541 else
21543 dest_regno = -1;
21544 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
21547 if (REG_P (src))
21549 src_regno = REGNO (src);
21550 src_gpr_p = INT_REGNO_P (src_regno);
21551 src_fp_p = FP_REGNO_P (src_regno);
21552 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
21553 src_vsx_p = src_fp_p | src_vmx_p;
21555 else
21557 src_regno = -1;
21558 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
21561 /* Register moves. */
21562 if (dest_regno >= 0 && src_regno >= 0)
21564 if (dest_gpr_p)
21566 if (src_gpr_p)
21567 return "#";
21569 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
21570 return (WORDS_BIG_ENDIAN
21571 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
21572 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
21574 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
21575 return "#";
21578 else if (TARGET_VSX && dest_vsx_p)
21580 if (src_vsx_p)
21581 return "xxlor %x0,%x1,%x1";
21583 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
21584 return (WORDS_BIG_ENDIAN
21585 ? "mtvsrdd %x0,%1,%L1"
21586 : "mtvsrdd %x0,%L1,%1");
21588 else if (TARGET_DIRECT_MOVE && src_gpr_p)
21589 return "#";
21592 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
21593 return "vor %0,%1,%1";
21595 else if (dest_fp_p && src_fp_p)
21596 return "#";
21599 /* Loads. */
21600 else if (dest_regno >= 0 && MEM_P (src))
21602 if (dest_gpr_p)
21604 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21605 return "lq %0,%1";
21606 else
21607 return "#";
21610 else if (TARGET_ALTIVEC && dest_vmx_p
21611 && altivec_indexed_or_indirect_operand (src, mode))
21612 return "lvx %0,%y1";
21614 else if (TARGET_VSX && dest_vsx_p)
21616 if (mode_supports_vsx_dform_quad (mode)
21617 && quad_address_p (XEXP (src, 0), mode, true))
21618 return "lxv %x0,%1";
21620 else if (TARGET_P9_VECTOR)
21621 return "lxvx %x0,%y1";
21623 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21624 return "lxvw4x %x0,%y1";
21626 else
21627 return "lxvd2x %x0,%y1";
21630 else if (TARGET_ALTIVEC && dest_vmx_p)
21631 return "lvx %0,%y1";
21633 else if (dest_fp_p)
21634 return "#";
21637 /* Stores. */
21638 else if (src_regno >= 0 && MEM_P (dest))
21640 if (src_gpr_p)
21642 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21643 return "stq %1,%0";
21644 else
21645 return "#";
21648 else if (TARGET_ALTIVEC && src_vmx_p
21649 && altivec_indexed_or_indirect_operand (src, mode))
21650 return "stvx %1,%y0";
21652 else if (TARGET_VSX && src_vsx_p)
21654 if (mode_supports_vsx_dform_quad (mode)
21655 && quad_address_p (XEXP (dest, 0), mode, true))
21656 return "stxv %x1,%0";
21658 else if (TARGET_P9_VECTOR)
21659 return "stxvx %x1,%y0";
21661 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21662 return "stxvw4x %x1,%y0";
21664 else
21665 return "stxvd2x %x1,%y0";
21668 else if (TARGET_ALTIVEC && src_vmx_p)
21669 return "stvx %1,%y0";
21671 else if (src_fp_p)
21672 return "#";
21675 /* Constants. */
21676 else if (dest_regno >= 0
21677 && (GET_CODE (src) == CONST_INT
21678 || GET_CODE (src) == CONST_WIDE_INT
21679 || GET_CODE (src) == CONST_DOUBLE
21680 || GET_CODE (src) == CONST_VECTOR))
21682 if (dest_gpr_p)
21683 return "#";
21685 else if ((dest_vmx_p && TARGET_ALTIVEC)
21686 || (dest_vsx_p && TARGET_VSX))
21687 return output_vec_const_move (operands);
21690 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21693 /* Validate a 128-bit move. */
21694 bool
21695 rs6000_move_128bit_ok_p (rtx operands[])
21697 machine_mode mode = GET_MODE (operands[0]);
21698 return (gpc_reg_operand (operands[0], mode)
21699 || gpc_reg_operand (operands[1], mode));
21702 /* Return true if a 128-bit move needs to be split. */
21703 bool
21704 rs6000_split_128bit_ok_p (rtx operands[])
21706 if (!reload_completed)
21707 return false;
21709 if (!gpr_or_gpr_p (operands[0], operands[1]))
21710 return false;
21712 if (quad_load_store_p (operands[0], operands[1]))
21713 return false;
21715 return true;
21719 /* Given a comparison operation, return the bit number in CCR to test. We
21720 know this is a valid comparison.
21722 SCC_P is 1 if this is for an scc. That means that %D will have been
21723 used instead of %C, so the bits will be in different places.
21725 Return -1 if OP isn't a valid comparison for some reason. */
21728 ccr_bit (rtx op, int scc_p)
21730 enum rtx_code code = GET_CODE (op);
21731 machine_mode cc_mode;
21732 int cc_regnum;
21733 int base_bit;
21734 rtx reg;
21736 if (!COMPARISON_P (op))
21737 return -1;
21739 reg = XEXP (op, 0);
21741 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21743 cc_mode = GET_MODE (reg);
21744 cc_regnum = REGNO (reg);
21745 base_bit = 4 * (cc_regnum - CR0_REGNO);
21747 validate_condition_mode (code, cc_mode);
21749 /* When generating a sCOND operation, only positive conditions are
21750 allowed. */
21751 gcc_assert (!scc_p
21752 || code == EQ || code == GT || code == LT || code == UNORDERED
21753 || code == GTU || code == LTU);
21755 switch (code)
21757 case NE:
21758 return scc_p ? base_bit + 3 : base_bit + 2;
21759 case EQ:
21760 return base_bit + 2;
21761 case GT: case GTU: case UNLE:
21762 return base_bit + 1;
21763 case LT: case LTU: case UNGE:
21764 return base_bit;
21765 case ORDERED: case UNORDERED:
21766 return base_bit + 3;
21768 case GE: case GEU:
21769 /* If scc, we will have done a cror to put the bit in the
21770 unordered position. So test that bit. For integer, this is ! LT
21771 unless this is an scc insn. */
21772 return scc_p ? base_bit + 3 : base_bit;
21774 case LE: case LEU:
21775 return scc_p ? base_bit + 3 : base_bit + 1;
21777 default:
21778 gcc_unreachable ();
21782 /* Return the GOT register. */
21785 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21787 /* The second flow pass currently (June 1999) can't update
21788 regs_ever_live without disturbing other parts of the compiler, so
21789 update it here to make the prolog/epilogue code happy. */
21790 if (!can_create_pseudo_p ()
21791 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21792 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21794 crtl->uses_pic_offset_table = 1;
21796 return pic_offset_table_rtx;
21799 static rs6000_stack_t stack_info;
21801 /* Function to init struct machine_function.
21802 This will be called, via a pointer variable,
21803 from push_function_context. */
21805 static struct machine_function *
21806 rs6000_init_machine_status (void)
21808 stack_info.reload_completed = 0;
21809 return ggc_cleared_alloc<machine_function> ();
21812 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21814 /* Write out a function code label. */
21816 void
21817 rs6000_output_function_entry (FILE *file, const char *fname)
21819 if (fname[0] != '.')
21821 switch (DEFAULT_ABI)
21823 default:
21824 gcc_unreachable ();
21826 case ABI_AIX:
21827 if (DOT_SYMBOLS)
21828 putc ('.', file);
21829 else
21830 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21831 break;
21833 case ABI_ELFv2:
21834 case ABI_V4:
21835 case ABI_DARWIN:
21836 break;
21840 RS6000_OUTPUT_BASENAME (file, fname);
21843 /* Print an operand. Recognize special options, documented below. */
21845 #if TARGET_ELF
21846 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21847 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21848 #else
21849 #define SMALL_DATA_RELOC "sda21"
21850 #define SMALL_DATA_REG 0
21851 #endif
21853 void
21854 print_operand (FILE *file, rtx x, int code)
21856 int i;
21857 unsigned HOST_WIDE_INT uval;
21859 switch (code)
21861 /* %a is output_address. */
21863 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21864 output_operand. */
21866 case 'D':
21867 /* Like 'J' but get to the GT bit only. */
21868 gcc_assert (REG_P (x));
21870 /* Bit 1 is GT bit. */
21871 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
21873 /* Add one for shift count in rlinm for scc. */
21874 fprintf (file, "%d", i + 1);
21875 return;
21877 case 'e':
21878 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21879 if (! INT_P (x))
21881 output_operand_lossage ("invalid %%e value");
21882 return;
21885 uval = INTVAL (x);
21886 if ((uval & 0xffff) == 0 && uval != 0)
21887 putc ('s', file);
21888 return;
21890 case 'E':
21891 /* X is a CR register. Print the number of the EQ bit of the CR */
21892 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21893 output_operand_lossage ("invalid %%E value");
21894 else
21895 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
21896 return;
21898 case 'f':
21899 /* X is a CR register. Print the shift count needed to move it
21900 to the high-order four bits. */
21901 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21902 output_operand_lossage ("invalid %%f value");
21903 else
21904 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
21905 return;
21907 case 'F':
21908 /* Similar, but print the count for the rotate in the opposite
21909 direction. */
21910 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21911 output_operand_lossage ("invalid %%F value");
21912 else
21913 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
21914 return;
21916 case 'G':
21917 /* X is a constant integer. If it is negative, print "m",
21918 otherwise print "z". This is to make an aze or ame insn. */
21919 if (GET_CODE (x) != CONST_INT)
21920 output_operand_lossage ("invalid %%G value");
21921 else if (INTVAL (x) >= 0)
21922 putc ('z', file);
21923 else
21924 putc ('m', file);
21925 return;
21927 case 'h':
21928 /* If constant, output low-order five bits. Otherwise, write
21929 normally. */
21930 if (INT_P (x))
21931 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
21932 else
21933 print_operand (file, x, 0);
21934 return;
21936 case 'H':
21937 /* If constant, output low-order six bits. Otherwise, write
21938 normally. */
21939 if (INT_P (x))
21940 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
21941 else
21942 print_operand (file, x, 0);
21943 return;
21945 case 'I':
21946 /* Print `i' if this is a constant, else nothing. */
21947 if (INT_P (x))
21948 putc ('i', file);
21949 return;
21951 case 'j':
21952 /* Write the bit number in CCR for jump. */
21953 i = ccr_bit (x, 0);
21954 if (i == -1)
21955 output_operand_lossage ("invalid %%j code");
21956 else
21957 fprintf (file, "%d", i);
21958 return;
21960 case 'J':
21961 /* Similar, but add one for shift count in rlinm for scc and pass
21962 scc flag to `ccr_bit'. */
21963 i = ccr_bit (x, 1);
21964 if (i == -1)
21965 output_operand_lossage ("invalid %%J code");
21966 else
21967 /* If we want bit 31, write a shift count of zero, not 32. */
21968 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21969 return;
21971 case 'k':
21972 /* X must be a constant. Write the 1's complement of the
21973 constant. */
21974 if (! INT_P (x))
21975 output_operand_lossage ("invalid %%k value");
21976 else
21977 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
21978 return;
21980 case 'K':
21981 /* X must be a symbolic constant on ELF. Write an
21982 expression suitable for an 'addi' that adds in the low 16
21983 bits of the MEM. */
21984 if (GET_CODE (x) == CONST)
21986 if (GET_CODE (XEXP (x, 0)) != PLUS
21987 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
21988 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
21989 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
21990 output_operand_lossage ("invalid %%K value");
21992 print_operand_address (file, x);
21993 fputs ("@l", file);
21994 return;
21996 /* %l is output_asm_label. */
21998 case 'L':
21999 /* Write second word of DImode or DFmode reference. Works on register
22000 or non-indexed memory only. */
22001 if (REG_P (x))
22002 fputs (reg_names[REGNO (x) + 1], file);
22003 else if (MEM_P (x))
22005 machine_mode mode = GET_MODE (x);
22006 /* Handle possible auto-increment. Since it is pre-increment and
22007 we have already done it, we can just use an offset of word. */
22008 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22009 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22010 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22011 UNITS_PER_WORD));
22012 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22013 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22014 UNITS_PER_WORD));
22015 else
22016 output_address (mode, XEXP (adjust_address_nv (x, SImode,
22017 UNITS_PER_WORD),
22018 0));
22020 if (small_data_operand (x, GET_MODE (x)))
22021 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22022 reg_names[SMALL_DATA_REG]);
22024 return;
22026 case 'N':
22027 /* Write the number of elements in the vector times 4. */
22028 if (GET_CODE (x) != PARALLEL)
22029 output_operand_lossage ("invalid %%N value");
22030 else
22031 fprintf (file, "%d", XVECLEN (x, 0) * 4);
22032 return;
22034 case 'O':
22035 /* Similar, but subtract 1 first. */
22036 if (GET_CODE (x) != PARALLEL)
22037 output_operand_lossage ("invalid %%O value");
22038 else
22039 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
22040 return;
22042 case 'p':
22043 /* X is a CONST_INT that is a power of two. Output the logarithm. */
22044 if (! INT_P (x)
22045 || INTVAL (x) < 0
22046 || (i = exact_log2 (INTVAL (x))) < 0)
22047 output_operand_lossage ("invalid %%p value");
22048 else
22049 fprintf (file, "%d", i);
22050 return;
22052 case 'P':
22053 /* The operand must be an indirect memory reference. The result
22054 is the register name. */
22055 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
22056 || REGNO (XEXP (x, 0)) >= 32)
22057 output_operand_lossage ("invalid %%P value");
22058 else
22059 fputs (reg_names[REGNO (XEXP (x, 0))], file);
22060 return;
22062 case 'q':
22063 /* This outputs the logical code corresponding to a boolean
22064 expression. The expression may have one or both operands
22065 negated (if one, only the first one). For condition register
22066 logical operations, it will also treat the negated
22067 CR codes as NOTs, but not handle NOTs of them. */
22069 const char *const *t = 0;
22070 const char *s;
22071 enum rtx_code code = GET_CODE (x);
22072 static const char * const tbl[3][3] = {
22073 { "and", "andc", "nor" },
22074 { "or", "orc", "nand" },
22075 { "xor", "eqv", "xor" } };
22077 if (code == AND)
22078 t = tbl[0];
22079 else if (code == IOR)
22080 t = tbl[1];
22081 else if (code == XOR)
22082 t = tbl[2];
22083 else
22084 output_operand_lossage ("invalid %%q value");
22086 if (GET_CODE (XEXP (x, 0)) != NOT)
22087 s = t[0];
22088 else
22090 if (GET_CODE (XEXP (x, 1)) == NOT)
22091 s = t[2];
22092 else
22093 s = t[1];
22096 fputs (s, file);
22098 return;
22100 case 'Q':
22101 if (! TARGET_MFCRF)
22102 return;
22103 fputc (',', file);
22104 /* FALLTHRU */
22106 case 'R':
22107 /* X is a CR register. Print the mask for `mtcrf'. */
22108 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22109 output_operand_lossage ("invalid %%R value");
22110 else
22111 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
22112 return;
22114 case 's':
22115 /* Low 5 bits of 32 - value */
22116 if (! INT_P (x))
22117 output_operand_lossage ("invalid %%s value");
22118 else
22119 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
22120 return;
22122 case 't':
22123 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
22124 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
22126 /* Bit 3 is OV bit. */
22127 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
22129 /* If we want bit 31, write a shift count of zero, not 32. */
22130 fprintf (file, "%d", i == 31 ? 0 : i + 1);
22131 return;
22133 case 'T':
22134 /* Print the symbolic name of a branch target register. */
22135 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
22136 && REGNO (x) != CTR_REGNO))
22137 output_operand_lossage ("invalid %%T value");
22138 else if (REGNO (x) == LR_REGNO)
22139 fputs ("lr", file);
22140 else
22141 fputs ("ctr", file);
22142 return;
22144 case 'u':
22145 /* High-order or low-order 16 bits of constant, whichever is non-zero,
22146 for use in unsigned operand. */
22147 if (! INT_P (x))
22149 output_operand_lossage ("invalid %%u value");
22150 return;
22153 uval = INTVAL (x);
22154 if ((uval & 0xffff) == 0)
22155 uval >>= 16;
22157 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
22158 return;
22160 case 'v':
22161 /* High-order 16 bits of constant for use in signed operand. */
22162 if (! INT_P (x))
22163 output_operand_lossage ("invalid %%v value");
22164 else
22165 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
22166 (INTVAL (x) >> 16) & 0xffff);
22167 return;
22169 case 'U':
22170 /* Print `u' if this has an auto-increment or auto-decrement. */
22171 if (MEM_P (x)
22172 && (GET_CODE (XEXP (x, 0)) == PRE_INC
22173 || GET_CODE (XEXP (x, 0)) == PRE_DEC
22174 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
22175 putc ('u', file);
22176 return;
22178 case 'V':
22179 /* Print the trap code for this operand. */
22180 switch (GET_CODE (x))
22182 case EQ:
22183 fputs ("eq", file); /* 4 */
22184 break;
22185 case NE:
22186 fputs ("ne", file); /* 24 */
22187 break;
22188 case LT:
22189 fputs ("lt", file); /* 16 */
22190 break;
22191 case LE:
22192 fputs ("le", file); /* 20 */
22193 break;
22194 case GT:
22195 fputs ("gt", file); /* 8 */
22196 break;
22197 case GE:
22198 fputs ("ge", file); /* 12 */
22199 break;
22200 case LTU:
22201 fputs ("llt", file); /* 2 */
22202 break;
22203 case LEU:
22204 fputs ("lle", file); /* 6 */
22205 break;
22206 case GTU:
22207 fputs ("lgt", file); /* 1 */
22208 break;
22209 case GEU:
22210 fputs ("lge", file); /* 5 */
22211 break;
22212 default:
22213 gcc_unreachable ();
22215 break;
22217 case 'w':
22218 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
22219 normally. */
22220 if (INT_P (x))
22221 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
22222 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
22223 else
22224 print_operand (file, x, 0);
22225 return;
22227 case 'x':
22228 /* X is a FPR or Altivec register used in a VSX context. */
22229 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
22230 output_operand_lossage ("invalid %%x value");
22231 else
22233 int reg = REGNO (x);
22234 int vsx_reg = (FP_REGNO_P (reg)
22235 ? reg - 32
22236 : reg - FIRST_ALTIVEC_REGNO + 32);
22238 #ifdef TARGET_REGNAMES
22239 if (TARGET_REGNAMES)
22240 fprintf (file, "%%vs%d", vsx_reg);
22241 else
22242 #endif
22243 fprintf (file, "%d", vsx_reg);
22245 return;
22247 case 'X':
22248 if (MEM_P (x)
22249 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
22250 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
22251 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
22252 putc ('x', file);
22253 return;
22255 case 'Y':
22256 /* Like 'L', for third word of TImode/PTImode */
22257 if (REG_P (x))
22258 fputs (reg_names[REGNO (x) + 2], file);
22259 else if (MEM_P (x))
22261 machine_mode mode = GET_MODE (x);
22262 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22263 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22264 output_address (mode, plus_constant (Pmode,
22265 XEXP (XEXP (x, 0), 0), 8));
22266 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22267 output_address (mode, plus_constant (Pmode,
22268 XEXP (XEXP (x, 0), 0), 8));
22269 else
22270 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
22271 if (small_data_operand (x, GET_MODE (x)))
22272 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22273 reg_names[SMALL_DATA_REG]);
22275 return;
22277 case 'z':
22278 /* X is a SYMBOL_REF. Write out the name preceded by a
22279 period and without any trailing data in brackets. Used for function
22280 names. If we are configured for System V (or the embedded ABI) on
22281 the PowerPC, do not emit the period, since those systems do not use
22282 TOCs and the like. */
22283 gcc_assert (GET_CODE (x) == SYMBOL_REF);
22285 /* For macho, check to see if we need a stub. */
22286 if (TARGET_MACHO)
22288 const char *name = XSTR (x, 0);
22289 #if TARGET_MACHO
22290 if (darwin_emit_branch_islands
22291 && MACHOPIC_INDIRECT
22292 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
22293 name = machopic_indirection_name (x, /*stub_p=*/true);
22294 #endif
22295 assemble_name (file, name);
22297 else if (!DOT_SYMBOLS)
22298 assemble_name (file, XSTR (x, 0));
22299 else
22300 rs6000_output_function_entry (file, XSTR (x, 0));
22301 return;
22303 case 'Z':
22304 /* Like 'L', for last word of TImode/PTImode. */
22305 if (REG_P (x))
22306 fputs (reg_names[REGNO (x) + 3], file);
22307 else if (MEM_P (x))
22309 machine_mode mode = GET_MODE (x);
22310 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22311 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22312 output_address (mode, plus_constant (Pmode,
22313 XEXP (XEXP (x, 0), 0), 12));
22314 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22315 output_address (mode, plus_constant (Pmode,
22316 XEXP (XEXP (x, 0), 0), 12));
22317 else
22318 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
22319 if (small_data_operand (x, GET_MODE (x)))
22320 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22321 reg_names[SMALL_DATA_REG]);
22323 return;
22325 /* Print AltiVec or SPE memory operand. */
22326 case 'y':
22328 rtx tmp;
22330 gcc_assert (MEM_P (x));
22332 tmp = XEXP (x, 0);
22334 /* Ugly hack because %y is overloaded. */
22335 if ((TARGET_SPE || TARGET_E500_DOUBLE)
22336 && (GET_MODE_SIZE (GET_MODE (x)) == 8
22337 || FLOAT128_2REG_P (GET_MODE (x))
22338 || GET_MODE (x) == TImode
22339 || GET_MODE (x) == PTImode))
22341 /* Handle [reg]. */
22342 if (REG_P (tmp))
22344 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
22345 break;
22347 /* Handle [reg+UIMM]. */
22348 else if (GET_CODE (tmp) == PLUS &&
22349 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
22351 int x;
22353 gcc_assert (REG_P (XEXP (tmp, 0)));
22355 x = INTVAL (XEXP (tmp, 1));
22356 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
22357 break;
22360 /* Fall through. Must be [reg+reg]. */
22362 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
22363 && GET_CODE (tmp) == AND
22364 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
22365 && INTVAL (XEXP (tmp, 1)) == -16)
22366 tmp = XEXP (tmp, 0);
22367 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
22368 && GET_CODE (tmp) == PRE_MODIFY)
22369 tmp = XEXP (tmp, 1);
22370 if (REG_P (tmp))
22371 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
22372 else
22374 if (GET_CODE (tmp) != PLUS
22375 || !REG_P (XEXP (tmp, 0))
22376 || !REG_P (XEXP (tmp, 1)))
22378 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
22379 break;
22382 if (REGNO (XEXP (tmp, 0)) == 0)
22383 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
22384 reg_names[ REGNO (XEXP (tmp, 0)) ]);
22385 else
22386 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
22387 reg_names[ REGNO (XEXP (tmp, 1)) ]);
22389 break;
22392 case 0:
22393 if (REG_P (x))
22394 fprintf (file, "%s", reg_names[REGNO (x)]);
22395 else if (MEM_P (x))
22397 /* We need to handle PRE_INC and PRE_DEC here, since we need to
22398 know the width from the mode. */
22399 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
22400 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
22401 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22402 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
22403 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
22404 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22405 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22406 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
22407 else
22408 output_address (GET_MODE (x), XEXP (x, 0));
22410 else
22412 if (toc_relative_expr_p (x, false))
22413 /* This hack along with a corresponding hack in
22414 rs6000_output_addr_const_extra arranges to output addends
22415 where the assembler expects to find them. eg.
22416 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
22417 without this hack would be output as "x@toc+4". We
22418 want "x+4@toc". */
22419 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22420 else
22421 output_addr_const (file, x);
22423 return;
22425 case '&':
22426 if (const char *name = get_some_local_dynamic_name ())
22427 assemble_name (file, name);
22428 else
22429 output_operand_lossage ("'%%&' used without any "
22430 "local dynamic TLS references");
22431 return;
22433 default:
22434 output_operand_lossage ("invalid %%xn code");
22438 /* Print the address of an operand. */
22440 void
22441 print_operand_address (FILE *file, rtx x)
22443 if (REG_P (x))
22444 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
22445 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
22446 || GET_CODE (x) == LABEL_REF)
22448 output_addr_const (file, x);
22449 if (small_data_operand (x, GET_MODE (x)))
22450 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22451 reg_names[SMALL_DATA_REG]);
22452 else
22453 gcc_assert (!TARGET_TOC);
22455 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22456 && REG_P (XEXP (x, 1)))
22458 if (REGNO (XEXP (x, 0)) == 0)
22459 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
22460 reg_names[ REGNO (XEXP (x, 0)) ]);
22461 else
22462 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
22463 reg_names[ REGNO (XEXP (x, 1)) ]);
22465 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22466 && GET_CODE (XEXP (x, 1)) == CONST_INT)
22467 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
22468 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
22469 #if TARGET_MACHO
22470 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22471 && CONSTANT_P (XEXP (x, 1)))
22473 fprintf (file, "lo16(");
22474 output_addr_const (file, XEXP (x, 1));
22475 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22477 #endif
22478 #if TARGET_ELF
22479 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22480 && CONSTANT_P (XEXP (x, 1)))
22482 output_addr_const (file, XEXP (x, 1));
22483 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22485 #endif
22486 else if (toc_relative_expr_p (x, false))
22488 /* This hack along with a corresponding hack in
22489 rs6000_output_addr_const_extra arranges to output addends
22490 where the assembler expects to find them. eg.
22491 (lo_sum (reg 9)
22492 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
22493 without this hack would be output as "x@toc+8@l(9)". We
22494 want "x+8@toc@l(9)". */
22495 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22496 if (GET_CODE (x) == LO_SUM)
22497 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
22498 else
22499 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
22501 else
22502 gcc_unreachable ();
22505 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
22507 static bool
22508 rs6000_output_addr_const_extra (FILE *file, rtx x)
22510 if (GET_CODE (x) == UNSPEC)
22511 switch (XINT (x, 1))
22513 case UNSPEC_TOCREL:
22514 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
22515 && REG_P (XVECEXP (x, 0, 1))
22516 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
22517 output_addr_const (file, XVECEXP (x, 0, 0));
22518 if (x == tocrel_base && tocrel_offset != const0_rtx)
22520 if (INTVAL (tocrel_offset) >= 0)
22521 fprintf (file, "+");
22522 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
22524 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
22526 putc ('-', file);
22527 assemble_name (file, toc_label_name);
22528 need_toc_init = 1;
22530 else if (TARGET_ELF)
22531 fputs ("@toc", file);
22532 return true;
22534 #if TARGET_MACHO
22535 case UNSPEC_MACHOPIC_OFFSET:
22536 output_addr_const (file, XVECEXP (x, 0, 0));
22537 putc ('-', file);
22538 machopic_output_function_base_name (file);
22539 return true;
22540 #endif
22542 return false;
22545 /* Target hook for assembling integer objects. The PowerPC version has
22546 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
22547 is defined. It also needs to handle DI-mode objects on 64-bit
22548 targets. */
22550 static bool
22551 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
22553 #ifdef RELOCATABLE_NEEDS_FIXUP
22554 /* Special handling for SI values. */
22555 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
22557 static int recurse = 0;
22559 /* For -mrelocatable, we mark all addresses that need to be fixed up in
22560 the .fixup section. Since the TOC section is already relocated, we
22561 don't need to mark it here. We used to skip the text section, but it
22562 should never be valid for relocated addresses to be placed in the text
22563 section. */
22564 if (DEFAULT_ABI == ABI_V4
22565 && (TARGET_RELOCATABLE || flag_pic > 1)
22566 && in_section != toc_section
22567 && !recurse
22568 && !CONST_SCALAR_INT_P (x)
22569 && CONSTANT_P (x))
22571 char buf[256];
22573 recurse = 1;
22574 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
22575 fixuplabelno++;
22576 ASM_OUTPUT_LABEL (asm_out_file, buf);
22577 fprintf (asm_out_file, "\t.long\t(");
22578 output_addr_const (asm_out_file, x);
22579 fprintf (asm_out_file, ")@fixup\n");
22580 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
22581 ASM_OUTPUT_ALIGN (asm_out_file, 2);
22582 fprintf (asm_out_file, "\t.long\t");
22583 assemble_name (asm_out_file, buf);
22584 fprintf (asm_out_file, "\n\t.previous\n");
22585 recurse = 0;
22586 return true;
22588 /* Remove initial .'s to turn a -mcall-aixdesc function
22589 address into the address of the descriptor, not the function
22590 itself. */
22591 else if (GET_CODE (x) == SYMBOL_REF
22592 && XSTR (x, 0)[0] == '.'
22593 && DEFAULT_ABI == ABI_AIX)
22595 const char *name = XSTR (x, 0);
22596 while (*name == '.')
22597 name++;
22599 fprintf (asm_out_file, "\t.long\t%s\n", name);
22600 return true;
22603 #endif /* RELOCATABLE_NEEDS_FIXUP */
22604 return default_assemble_integer (x, size, aligned_p);
22607 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
22608 /* Emit an assembler directive to set symbol visibility for DECL to
22609 VISIBILITY_TYPE. */
22611 static void
22612 rs6000_assemble_visibility (tree decl, int vis)
22614 if (TARGET_XCOFF)
22615 return;
22617 /* Functions need to have their entry point symbol visibility set as
22618 well as their descriptor symbol visibility. */
22619 if (DEFAULT_ABI == ABI_AIX
22620 && DOT_SYMBOLS
22621 && TREE_CODE (decl) == FUNCTION_DECL)
22623 static const char * const visibility_types[] = {
22624 NULL, "internal", "hidden", "protected"
22627 const char *name, *type;
22629 name = ((* targetm.strip_name_encoding)
22630 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
22631 type = visibility_types[vis];
22633 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
22634 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
22636 else
22637 default_assemble_visibility (decl, vis);
22639 #endif
22641 enum rtx_code
22642 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
22644 /* Reversal of FP compares takes care -- an ordered compare
22645 becomes an unordered compare and vice versa. */
22646 if (mode == CCFPmode
22647 && (!flag_finite_math_only
22648 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
22649 || code == UNEQ || code == LTGT))
22650 return reverse_condition_maybe_unordered (code);
22651 else
22652 return reverse_condition (code);
22655 /* Generate a compare for CODE. Return a brand-new rtx that
22656 represents the result of the compare. */
22658 static rtx
22659 rs6000_generate_compare (rtx cmp, machine_mode mode)
22661 machine_mode comp_mode;
22662 rtx compare_result;
22663 enum rtx_code code = GET_CODE (cmp);
22664 rtx op0 = XEXP (cmp, 0);
22665 rtx op1 = XEXP (cmp, 1);
22667 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22668 comp_mode = CCmode;
22669 else if (FLOAT_MODE_P (mode))
22670 comp_mode = CCFPmode;
22671 else if (code == GTU || code == LTU
22672 || code == GEU || code == LEU)
22673 comp_mode = CCUNSmode;
22674 else if ((code == EQ || code == NE)
22675 && unsigned_reg_p (op0)
22676 && (unsigned_reg_p (op1)
22677 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
22678 /* These are unsigned values, perhaps there will be a later
22679 ordering compare that can be shared with this one. */
22680 comp_mode = CCUNSmode;
22681 else
22682 comp_mode = CCmode;
22684 /* If we have an unsigned compare, make sure we don't have a signed value as
22685 an immediate. */
22686 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
22687 && INTVAL (op1) < 0)
22689 op0 = copy_rtx_if_shared (op0);
22690 op1 = force_reg (GET_MODE (op0), op1);
22691 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
22694 /* First, the compare. */
22695 compare_result = gen_reg_rtx (comp_mode);
22697 /* E500 FP compare instructions on the GPRs. Yuck! */
22698 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
22699 && FLOAT_MODE_P (mode))
22701 rtx cmp, or_result, compare_result2;
22702 machine_mode op_mode = GET_MODE (op0);
22703 bool reverse_p;
22705 if (op_mode == VOIDmode)
22706 op_mode = GET_MODE (op1);
22708 /* First reverse the condition codes that aren't directly supported. */
22709 switch (code)
22711 case NE:
22712 case UNLT:
22713 case UNLE:
22714 case UNGT:
22715 case UNGE:
22716 code = reverse_condition_maybe_unordered (code);
22717 reverse_p = true;
22718 break;
22720 case EQ:
22721 case LT:
22722 case LE:
22723 case GT:
22724 case GE:
22725 reverse_p = false;
22726 break;
22728 default:
22729 gcc_unreachable ();
22732 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
22733 This explains the following mess. */
22735 switch (code)
22737 case EQ:
22738 switch (op_mode)
22740 case SFmode:
22741 cmp = (flag_finite_math_only && !flag_trapping_math)
22742 ? gen_tstsfeq_gpr (compare_result, op0, op1)
22743 : gen_cmpsfeq_gpr (compare_result, op0, op1);
22744 break;
22746 case DFmode:
22747 cmp = (flag_finite_math_only && !flag_trapping_math)
22748 ? gen_tstdfeq_gpr (compare_result, op0, op1)
22749 : gen_cmpdfeq_gpr (compare_result, op0, op1);
22750 break;
22752 case TFmode:
22753 case IFmode:
22754 case KFmode:
22755 cmp = (flag_finite_math_only && !flag_trapping_math)
22756 ? gen_tsttfeq_gpr (compare_result, op0, op1)
22757 : gen_cmptfeq_gpr (compare_result, op0, op1);
22758 break;
22760 default:
22761 gcc_unreachable ();
22763 break;
22765 case GT:
22766 case GE:
22767 switch (op_mode)
22769 case SFmode:
22770 cmp = (flag_finite_math_only && !flag_trapping_math)
22771 ? gen_tstsfgt_gpr (compare_result, op0, op1)
22772 : gen_cmpsfgt_gpr (compare_result, op0, op1);
22773 break;
22775 case DFmode:
22776 cmp = (flag_finite_math_only && !flag_trapping_math)
22777 ? gen_tstdfgt_gpr (compare_result, op0, op1)
22778 : gen_cmpdfgt_gpr (compare_result, op0, op1);
22779 break;
22781 case TFmode:
22782 case IFmode:
22783 case KFmode:
22784 cmp = (flag_finite_math_only && !flag_trapping_math)
22785 ? gen_tsttfgt_gpr (compare_result, op0, op1)
22786 : gen_cmptfgt_gpr (compare_result, op0, op1);
22787 break;
22789 default:
22790 gcc_unreachable ();
22792 break;
22794 case LT:
22795 case LE:
22796 switch (op_mode)
22798 case SFmode:
22799 cmp = (flag_finite_math_only && !flag_trapping_math)
22800 ? gen_tstsflt_gpr (compare_result, op0, op1)
22801 : gen_cmpsflt_gpr (compare_result, op0, op1);
22802 break;
22804 case DFmode:
22805 cmp = (flag_finite_math_only && !flag_trapping_math)
22806 ? gen_tstdflt_gpr (compare_result, op0, op1)
22807 : gen_cmpdflt_gpr (compare_result, op0, op1);
22808 break;
22810 case TFmode:
22811 case IFmode:
22812 case KFmode:
22813 cmp = (flag_finite_math_only && !flag_trapping_math)
22814 ? gen_tsttflt_gpr (compare_result, op0, op1)
22815 : gen_cmptflt_gpr (compare_result, op0, op1);
22816 break;
22818 default:
22819 gcc_unreachable ();
22821 break;
22823 default:
22824 gcc_unreachable ();
22827 /* Synthesize LE and GE from LT/GT || EQ. */
22828 if (code == LE || code == GE)
22830 emit_insn (cmp);
22832 compare_result2 = gen_reg_rtx (CCFPmode);
22834 /* Do the EQ. */
22835 switch (op_mode)
22837 case SFmode:
22838 cmp = (flag_finite_math_only && !flag_trapping_math)
22839 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
22840 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
22841 break;
22843 case DFmode:
22844 cmp = (flag_finite_math_only && !flag_trapping_math)
22845 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
22846 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
22847 break;
22849 case TFmode:
22850 case IFmode:
22851 case KFmode:
22852 cmp = (flag_finite_math_only && !flag_trapping_math)
22853 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
22854 : gen_cmptfeq_gpr (compare_result2, op0, op1);
22855 break;
22857 default:
22858 gcc_unreachable ();
22861 emit_insn (cmp);
22863 /* OR them together. */
22864 or_result = gen_reg_rtx (CCFPmode);
22865 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
22866 compare_result2);
22867 compare_result = or_result;
22870 code = reverse_p ? NE : EQ;
22872 emit_insn (cmp);
22875 /* IEEE 128-bit support in VSX registers when we do not have hardware
22876 support. */
22877 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22879 rtx libfunc = NULL_RTX;
22880 bool check_nan = false;
22881 rtx dest;
22883 switch (code)
22885 case EQ:
22886 case NE:
22887 libfunc = optab_libfunc (eq_optab, mode);
22888 break;
22890 case GT:
22891 case GE:
22892 libfunc = optab_libfunc (ge_optab, mode);
22893 break;
22895 case LT:
22896 case LE:
22897 libfunc = optab_libfunc (le_optab, mode);
22898 break;
22900 case UNORDERED:
22901 case ORDERED:
22902 libfunc = optab_libfunc (unord_optab, mode);
22903 code = (code == UNORDERED) ? NE : EQ;
22904 break;
22906 case UNGE:
22907 case UNGT:
22908 check_nan = true;
22909 libfunc = optab_libfunc (ge_optab, mode);
22910 code = (code == UNGE) ? GE : GT;
22911 break;
22913 case UNLE:
22914 case UNLT:
22915 check_nan = true;
22916 libfunc = optab_libfunc (le_optab, mode);
22917 code = (code == UNLE) ? LE : LT;
22918 break;
22920 case UNEQ:
22921 case LTGT:
22922 check_nan = true;
22923 libfunc = optab_libfunc (eq_optab, mode);
22924 code = (code = UNEQ) ? EQ : NE;
22925 break;
22927 default:
22928 gcc_unreachable ();
22931 gcc_assert (libfunc);
22933 if (!check_nan)
22934 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22935 SImode, 2, op0, mode, op1, mode);
22937 /* The library signals an exception for signalling NaNs, so we need to
22938 handle isgreater, etc. by first checking isordered. */
22939 else
22941 rtx ne_rtx, normal_dest, unord_dest;
22942 rtx unord_func = optab_libfunc (unord_optab, mode);
22943 rtx join_label = gen_label_rtx ();
22944 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
22945 rtx unord_cmp = gen_reg_rtx (comp_mode);
22948 /* Test for either value being a NaN. */
22949 gcc_assert (unord_func);
22950 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
22951 SImode, 2, op0, mode, op1,
22952 mode);
22954 /* Set value (0) if either value is a NaN, and jump to the join
22955 label. */
22956 dest = gen_reg_rtx (SImode);
22957 emit_move_insn (dest, const1_rtx);
22958 emit_insn (gen_rtx_SET (unord_cmp,
22959 gen_rtx_COMPARE (comp_mode, unord_dest,
22960 const0_rtx)));
22962 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
22963 emit_jump_insn (gen_rtx_SET (pc_rtx,
22964 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22965 join_ref,
22966 pc_rtx)));
22968 /* Do the normal comparison, knowing that the values are not
22969 NaNs. */
22970 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22971 SImode, 2, op0, mode, op1,
22972 mode);
22974 emit_insn (gen_cstoresi4 (dest,
22975 gen_rtx_fmt_ee (code, SImode, normal_dest,
22976 const0_rtx),
22977 normal_dest, const0_rtx));
22979 /* Join NaN and non-Nan paths. Compare dest against 0. */
22980 emit_label (join_label);
22981 code = NE;
22984 emit_insn (gen_rtx_SET (compare_result,
22985 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22988 else
22990 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22991 CLOBBERs to match cmptf_internal2 pattern. */
22992 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22993 && FLOAT128_IBM_P (GET_MODE (op0))
22994 && TARGET_HARD_FLOAT && TARGET_FPRS)
22995 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22996 gen_rtvec (10,
22997 gen_rtx_SET (compare_result,
22998 gen_rtx_COMPARE (comp_mode, op0, op1)),
22999 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23000 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23001 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23002 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23003 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23004 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23005 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23006 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23007 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
23008 else if (GET_CODE (op1) == UNSPEC
23009 && XINT (op1, 1) == UNSPEC_SP_TEST)
23011 rtx op1b = XVECEXP (op1, 0, 0);
23012 comp_mode = CCEQmode;
23013 compare_result = gen_reg_rtx (CCEQmode);
23014 if (TARGET_64BIT)
23015 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
23016 else
23017 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
23019 else
23020 emit_insn (gen_rtx_SET (compare_result,
23021 gen_rtx_COMPARE (comp_mode, op0, op1)));
23024 /* Some kinds of FP comparisons need an OR operation;
23025 under flag_finite_math_only we don't bother. */
23026 if (FLOAT_MODE_P (mode)
23027 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
23028 && !flag_finite_math_only
23029 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
23030 && (code == LE || code == GE
23031 || code == UNEQ || code == LTGT
23032 || code == UNGT || code == UNLT))
23034 enum rtx_code or1, or2;
23035 rtx or1_rtx, or2_rtx, compare2_rtx;
23036 rtx or_result = gen_reg_rtx (CCEQmode);
23038 switch (code)
23040 case LE: or1 = LT; or2 = EQ; break;
23041 case GE: or1 = GT; or2 = EQ; break;
23042 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
23043 case LTGT: or1 = LT; or2 = GT; break;
23044 case UNGT: or1 = UNORDERED; or2 = GT; break;
23045 case UNLT: or1 = UNORDERED; or2 = LT; break;
23046 default: gcc_unreachable ();
23048 validate_condition_mode (or1, comp_mode);
23049 validate_condition_mode (or2, comp_mode);
23050 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
23051 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
23052 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
23053 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
23054 const_true_rtx);
23055 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
23057 compare_result = or_result;
23058 code = EQ;
23061 validate_condition_mode (code, GET_MODE (compare_result));
23063 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
23067 /* Return the diagnostic message string if the binary operation OP is
23068 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23070 static const char*
23071 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
23072 const_tree type1,
23073 const_tree type2)
23075 enum machine_mode mode1 = TYPE_MODE (type1);
23076 enum machine_mode mode2 = TYPE_MODE (type2);
23078 /* For complex modes, use the inner type. */
23079 if (COMPLEX_MODE_P (mode1))
23080 mode1 = GET_MODE_INNER (mode1);
23082 if (COMPLEX_MODE_P (mode2))
23083 mode2 = GET_MODE_INNER (mode2);
23085 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
23086 double to intermix unless -mfloat128-convert. */
23087 if (mode1 == mode2)
23088 return NULL;
23090 if (!TARGET_FLOAT128_CVT)
23092 if ((mode1 == KFmode && mode2 == IFmode)
23093 || (mode1 == IFmode && mode2 == KFmode))
23094 return N_("__float128 and __ibm128 cannot be used in the same "
23095 "expression");
23097 if (TARGET_IEEEQUAD
23098 && ((mode1 == IFmode && mode2 == TFmode)
23099 || (mode1 == TFmode && mode2 == IFmode)))
23100 return N_("__ibm128 and long double cannot be used in the same "
23101 "expression");
23103 if (!TARGET_IEEEQUAD
23104 && ((mode1 == KFmode && mode2 == TFmode)
23105 || (mode1 == TFmode && mode2 == KFmode)))
23106 return N_("__float128 and long double cannot be used in the same "
23107 "expression");
23110 return NULL;
23114 /* Expand floating point conversion to/from __float128 and __ibm128. */
23116 void
23117 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
23119 machine_mode dest_mode = GET_MODE (dest);
23120 machine_mode src_mode = GET_MODE (src);
23121 convert_optab cvt = unknown_optab;
23122 bool do_move = false;
23123 rtx libfunc = NULL_RTX;
23124 rtx dest2;
23125 typedef rtx (*rtx_2func_t) (rtx, rtx);
23126 rtx_2func_t hw_convert = (rtx_2func_t)0;
23127 size_t kf_or_tf;
23129 struct hw_conv_t {
23130 rtx_2func_t from_df;
23131 rtx_2func_t from_sf;
23132 rtx_2func_t from_si_sign;
23133 rtx_2func_t from_si_uns;
23134 rtx_2func_t from_di_sign;
23135 rtx_2func_t from_di_uns;
23136 rtx_2func_t to_df;
23137 rtx_2func_t to_sf;
23138 rtx_2func_t to_si_sign;
23139 rtx_2func_t to_si_uns;
23140 rtx_2func_t to_di_sign;
23141 rtx_2func_t to_di_uns;
23142 } hw_conversions[2] = {
23143 /* convertions to/from KFmode */
23145 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
23146 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
23147 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
23148 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
23149 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
23150 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
23151 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
23152 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
23153 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
23154 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
23155 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
23156 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
23159 /* convertions to/from TFmode */
23161 gen_extenddftf2_hw, /* TFmode <- DFmode. */
23162 gen_extendsftf2_hw, /* TFmode <- SFmode. */
23163 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
23164 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
23165 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
23166 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
23167 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
23168 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
23169 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
23170 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
23171 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
23172 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
23176 if (dest_mode == src_mode)
23177 gcc_unreachable ();
23179 /* Eliminate memory operations. */
23180 if (MEM_P (src))
23181 src = force_reg (src_mode, src);
23183 if (MEM_P (dest))
23185 rtx tmp = gen_reg_rtx (dest_mode);
23186 rs6000_expand_float128_convert (tmp, src, unsigned_p);
23187 rs6000_emit_move (dest, tmp, dest_mode);
23188 return;
23191 /* Convert to IEEE 128-bit floating point. */
23192 if (FLOAT128_IEEE_P (dest_mode))
23194 if (dest_mode == KFmode)
23195 kf_or_tf = 0;
23196 else if (dest_mode == TFmode)
23197 kf_or_tf = 1;
23198 else
23199 gcc_unreachable ();
23201 switch (src_mode)
23203 case DFmode:
23204 cvt = sext_optab;
23205 hw_convert = hw_conversions[kf_or_tf].from_df;
23206 break;
23208 case SFmode:
23209 cvt = sext_optab;
23210 hw_convert = hw_conversions[kf_or_tf].from_sf;
23211 break;
23213 case KFmode:
23214 case IFmode:
23215 case TFmode:
23216 if (FLOAT128_IBM_P (src_mode))
23217 cvt = sext_optab;
23218 else
23219 do_move = true;
23220 break;
23222 case SImode:
23223 if (unsigned_p)
23225 cvt = ufloat_optab;
23226 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
23228 else
23230 cvt = sfloat_optab;
23231 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
23233 break;
23235 case DImode:
23236 if (unsigned_p)
23238 cvt = ufloat_optab;
23239 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
23241 else
23243 cvt = sfloat_optab;
23244 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
23246 break;
23248 default:
23249 gcc_unreachable ();
23253 /* Convert from IEEE 128-bit floating point. */
23254 else if (FLOAT128_IEEE_P (src_mode))
23256 if (src_mode == KFmode)
23257 kf_or_tf = 0;
23258 else if (src_mode == TFmode)
23259 kf_or_tf = 1;
23260 else
23261 gcc_unreachable ();
23263 switch (dest_mode)
23265 case DFmode:
23266 cvt = trunc_optab;
23267 hw_convert = hw_conversions[kf_or_tf].to_df;
23268 break;
23270 case SFmode:
23271 cvt = trunc_optab;
23272 hw_convert = hw_conversions[kf_or_tf].to_sf;
23273 break;
23275 case KFmode:
23276 case IFmode:
23277 case TFmode:
23278 if (FLOAT128_IBM_P (dest_mode))
23279 cvt = trunc_optab;
23280 else
23281 do_move = true;
23282 break;
23284 case SImode:
23285 if (unsigned_p)
23287 cvt = ufix_optab;
23288 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
23290 else
23292 cvt = sfix_optab;
23293 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
23295 break;
23297 case DImode:
23298 if (unsigned_p)
23300 cvt = ufix_optab;
23301 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
23303 else
23305 cvt = sfix_optab;
23306 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
23308 break;
23310 default:
23311 gcc_unreachable ();
23315 /* Both IBM format. */
23316 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
23317 do_move = true;
23319 else
23320 gcc_unreachable ();
23322 /* Handle conversion between TFmode/KFmode. */
23323 if (do_move)
23324 emit_move_insn (dest, gen_lowpart (dest_mode, src));
23326 /* Handle conversion if we have hardware support. */
23327 else if (TARGET_FLOAT128_HW && hw_convert)
23328 emit_insn ((hw_convert) (dest, src));
23330 /* Call an external function to do the conversion. */
23331 else if (cvt != unknown_optab)
23333 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
23334 gcc_assert (libfunc != NULL_RTX);
23336 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
23337 src_mode);
23339 gcc_assert (dest2 != NULL_RTX);
23340 if (!rtx_equal_p (dest, dest2))
23341 emit_move_insn (dest, dest2);
23344 else
23345 gcc_unreachable ();
23347 return;
23350 /* Split a conversion from __float128 to an integer type into separate insns.
23351 OPERANDS points to the destination, source, and V2DI temporary
23352 register. CODE is either FIX or UNSIGNED_FIX. */
23354 void
23355 convert_float128_to_int (rtx *operands, enum rtx_code code)
23357 rtx dest = operands[0];
23358 rtx src = operands[1];
23359 rtx tmp = operands[2];
23360 rtx cvt;
23361 rtvec cvt_vec;
23362 rtx cvt_unspec;
23363 rtvec move_vec;
23364 rtx move_unspec;
23366 if (GET_CODE (tmp) == SCRATCH)
23367 tmp = gen_reg_rtx (V2DImode);
23369 if (MEM_P (dest))
23370 dest = rs6000_address_for_fpconvert (dest);
23372 /* Generate the actual convert insn of the form:
23373 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
23374 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
23375 cvt_vec = gen_rtvec (1, cvt);
23376 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23377 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
23379 /* Generate the move insn of the form:
23380 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
23381 move_vec = gen_rtvec (1, tmp);
23382 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
23383 emit_insn (gen_rtx_SET (dest, move_unspec));
23386 /* Split a conversion from an integer type to __float128 into separate insns.
23387 OPERANDS points to the destination, source, and V2DI temporary
23388 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
23390 void
23391 convert_int_to_float128 (rtx *operands, enum rtx_code code)
23393 rtx dest = operands[0];
23394 rtx src = operands[1];
23395 rtx tmp = operands[2];
23396 rtx cvt;
23397 rtvec cvt_vec;
23398 rtx cvt_unspec;
23399 rtvec move_vec;
23400 rtx move_unspec;
23401 rtx unsigned_flag;
23403 if (GET_CODE (tmp) == SCRATCH)
23404 tmp = gen_reg_rtx (V2DImode);
23406 if (MEM_P (src))
23407 src = rs6000_address_for_fpconvert (src);
23409 /* Generate the move of the integer into the Altivec register of the form:
23410 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
23411 (const_int 0)] UNSPEC_IEEE128_MOVE)).
23414 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
23416 if (GET_MODE (src) == SImode)
23418 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
23419 move_vec = gen_rtvec (2, src, unsigned_flag);
23421 else
23422 move_vec = gen_rtvec (1, src);
23424 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
23425 emit_insn (gen_rtx_SET (tmp, move_unspec));
23427 /* Generate the actual convert insn of the form:
23428 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
23429 UNSPEC_IEEE128_CONVERT))). */
23430 cvt_vec = gen_rtvec (1, tmp);
23431 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23432 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
23433 emit_insn (gen_rtx_SET (dest, cvt));
23437 /* Emit the RTL for an sISEL pattern. */
23439 void
23440 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
23442 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
23445 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
23446 can be used as that dest register. Return the dest register. */
23449 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
23451 if (op2 == const0_rtx)
23452 return op1;
23454 if (GET_CODE (scratch) == SCRATCH)
23455 scratch = gen_reg_rtx (mode);
23457 if (logical_operand (op2, mode))
23458 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
23459 else
23460 emit_insn (gen_rtx_SET (scratch,
23461 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
23463 return scratch;
23466 void
23467 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
23469 rtx condition_rtx;
23470 machine_mode op_mode;
23471 enum rtx_code cond_code;
23472 rtx result = operands[0];
23474 condition_rtx = rs6000_generate_compare (operands[1], mode);
23475 cond_code = GET_CODE (condition_rtx);
23477 if (FLOAT_MODE_P (mode)
23478 && !TARGET_FPRS && TARGET_HARD_FLOAT)
23480 rtx t;
23482 PUT_MODE (condition_rtx, SImode);
23483 t = XEXP (condition_rtx, 0);
23485 gcc_assert (cond_code == NE || cond_code == EQ);
23487 if (cond_code == NE)
23488 emit_insn (gen_e500_flip_gt_bit (t, t));
23490 emit_insn (gen_move_from_CR_gt_bit (result, t));
23491 return;
23494 if (cond_code == NE
23495 || cond_code == GE || cond_code == LE
23496 || cond_code == GEU || cond_code == LEU
23497 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
23499 rtx not_result = gen_reg_rtx (CCEQmode);
23500 rtx not_op, rev_cond_rtx;
23501 machine_mode cc_mode;
23503 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
23505 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
23506 SImode, XEXP (condition_rtx, 0), const0_rtx);
23507 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
23508 emit_insn (gen_rtx_SET (not_result, not_op));
23509 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
23512 op_mode = GET_MODE (XEXP (operands[1], 0));
23513 if (op_mode == VOIDmode)
23514 op_mode = GET_MODE (XEXP (operands[1], 1));
23516 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
23518 PUT_MODE (condition_rtx, DImode);
23519 convert_move (result, condition_rtx, 0);
23521 else
23523 PUT_MODE (condition_rtx, SImode);
23524 emit_insn (gen_rtx_SET (result, condition_rtx));
23528 /* Emit a branch of kind CODE to location LOC. */
23530 void
23531 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
23533 rtx condition_rtx, loc_ref;
23535 condition_rtx = rs6000_generate_compare (operands[0], mode);
23536 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
23537 emit_jump_insn (gen_rtx_SET (pc_rtx,
23538 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
23539 loc_ref, pc_rtx)));
23542 /* Return the string to output a conditional branch to LABEL, which is
23543 the operand template of the label, or NULL if the branch is really a
23544 conditional return.
23546 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
23547 condition code register and its mode specifies what kind of
23548 comparison we made.
23550 REVERSED is nonzero if we should reverse the sense of the comparison.
23552 INSN is the insn. */
23554 char *
23555 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
23557 static char string[64];
23558 enum rtx_code code = GET_CODE (op);
23559 rtx cc_reg = XEXP (op, 0);
23560 machine_mode mode = GET_MODE (cc_reg);
23561 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
23562 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
23563 int really_reversed = reversed ^ need_longbranch;
23564 char *s = string;
23565 const char *ccode;
23566 const char *pred;
23567 rtx note;
23569 validate_condition_mode (code, mode);
23571 /* Work out which way this really branches. We could use
23572 reverse_condition_maybe_unordered here always but this
23573 makes the resulting assembler clearer. */
23574 if (really_reversed)
23576 /* Reversal of FP compares takes care -- an ordered compare
23577 becomes an unordered compare and vice versa. */
23578 if (mode == CCFPmode)
23579 code = reverse_condition_maybe_unordered (code);
23580 else
23581 code = reverse_condition (code);
23584 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
23586 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
23587 to the GT bit. */
23588 switch (code)
23590 case EQ:
23591 /* Opposite of GT. */
23592 code = GT;
23593 break;
23595 case NE:
23596 code = UNLE;
23597 break;
23599 default:
23600 gcc_unreachable ();
23604 switch (code)
23606 /* Not all of these are actually distinct opcodes, but
23607 we distinguish them for clarity of the resulting assembler. */
23608 case NE: case LTGT:
23609 ccode = "ne"; break;
23610 case EQ: case UNEQ:
23611 ccode = "eq"; break;
23612 case GE: case GEU:
23613 ccode = "ge"; break;
23614 case GT: case GTU: case UNGT:
23615 ccode = "gt"; break;
23616 case LE: case LEU:
23617 ccode = "le"; break;
23618 case LT: case LTU: case UNLT:
23619 ccode = "lt"; break;
23620 case UNORDERED: ccode = "un"; break;
23621 case ORDERED: ccode = "nu"; break;
23622 case UNGE: ccode = "nl"; break;
23623 case UNLE: ccode = "ng"; break;
23624 default:
23625 gcc_unreachable ();
23628 /* Maybe we have a guess as to how likely the branch is. */
23629 pred = "";
23630 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
23631 if (note != NULL_RTX)
23633 /* PROB is the difference from 50%. */
23634 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
23636 /* Only hint for highly probable/improbable branches on newer cpus when
23637 we have real profile data, as static prediction overrides processor
23638 dynamic prediction. For older cpus we may as well always hint, but
23639 assume not taken for branches that are very close to 50% as a
23640 mispredicted taken branch is more expensive than a
23641 mispredicted not-taken branch. */
23642 if (rs6000_always_hint
23643 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
23644 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
23645 && br_prob_note_reliable_p (note)))
23647 if (abs (prob) > REG_BR_PROB_BASE / 20
23648 && ((prob > 0) ^ need_longbranch))
23649 pred = "+";
23650 else
23651 pred = "-";
23655 if (label == NULL)
23656 s += sprintf (s, "b%slr%s ", ccode, pred);
23657 else
23658 s += sprintf (s, "b%s%s ", ccode, pred);
23660 /* We need to escape any '%' characters in the reg_names string.
23661 Assume they'd only be the first character.... */
23662 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
23663 *s++ = '%';
23664 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
23666 if (label != NULL)
23668 /* If the branch distance was too far, we may have to use an
23669 unconditional branch to go the distance. */
23670 if (need_longbranch)
23671 s += sprintf (s, ",$+8\n\tb %s", label);
23672 else
23673 s += sprintf (s, ",%s", label);
23676 return string;
23679 /* Return the string to flip the GT bit on a CR. */
23680 char *
23681 output_e500_flip_gt_bit (rtx dst, rtx src)
23683 static char string[64];
23684 int a, b;
23686 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
23687 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
23689 /* GT bit. */
23690 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
23691 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
23693 sprintf (string, "crnot %d,%d", a, b);
23694 return string;
23697 /* Return insn for VSX or Altivec comparisons. */
23699 static rtx
23700 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
23702 rtx mask;
23703 machine_mode mode = GET_MODE (op0);
23705 switch (code)
23707 default:
23708 break;
23710 case GE:
23711 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23712 return NULL_RTX;
23713 /* FALLTHRU */
23715 case EQ:
23716 case GT:
23717 case GTU:
23718 case ORDERED:
23719 case UNORDERED:
23720 case UNEQ:
23721 case LTGT:
23722 mask = gen_reg_rtx (mode);
23723 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
23724 return mask;
23727 return NULL_RTX;
23730 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
23731 DMODE is expected destination mode. This is a recursive function. */
23733 static rtx
23734 rs6000_emit_vector_compare (enum rtx_code rcode,
23735 rtx op0, rtx op1,
23736 machine_mode dmode)
23738 rtx mask;
23739 bool swap_operands = false;
23740 bool try_again = false;
23742 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
23743 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
23745 /* See if the comparison works as is. */
23746 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
23747 if (mask)
23748 return mask;
23750 switch (rcode)
23752 case LT:
23753 rcode = GT;
23754 swap_operands = true;
23755 try_again = true;
23756 break;
23757 case LTU:
23758 rcode = GTU;
23759 swap_operands = true;
23760 try_again = true;
23761 break;
23762 case NE:
23763 case UNLE:
23764 case UNLT:
23765 case UNGE:
23766 case UNGT:
23767 /* Invert condition and try again.
23768 e.g., A != B becomes ~(A==B). */
23770 enum rtx_code rev_code;
23771 enum insn_code nor_code;
23772 rtx mask2;
23774 rev_code = reverse_condition_maybe_unordered (rcode);
23775 if (rev_code == UNKNOWN)
23776 return NULL_RTX;
23778 nor_code = optab_handler (one_cmpl_optab, dmode);
23779 if (nor_code == CODE_FOR_nothing)
23780 return NULL_RTX;
23782 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
23783 if (!mask2)
23784 return NULL_RTX;
23786 mask = gen_reg_rtx (dmode);
23787 emit_insn (GEN_FCN (nor_code) (mask, mask2));
23788 return mask;
23790 break;
23791 case GE:
23792 case GEU:
23793 case LE:
23794 case LEU:
23795 /* Try GT/GTU/LT/LTU OR EQ */
23797 rtx c_rtx, eq_rtx;
23798 enum insn_code ior_code;
23799 enum rtx_code new_code;
23801 switch (rcode)
23803 case GE:
23804 new_code = GT;
23805 break;
23807 case GEU:
23808 new_code = GTU;
23809 break;
23811 case LE:
23812 new_code = LT;
23813 break;
23815 case LEU:
23816 new_code = LTU;
23817 break;
23819 default:
23820 gcc_unreachable ();
23823 ior_code = optab_handler (ior_optab, dmode);
23824 if (ior_code == CODE_FOR_nothing)
23825 return NULL_RTX;
23827 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
23828 if (!c_rtx)
23829 return NULL_RTX;
23831 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
23832 if (!eq_rtx)
23833 return NULL_RTX;
23835 mask = gen_reg_rtx (dmode);
23836 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
23837 return mask;
23839 break;
23840 default:
23841 return NULL_RTX;
23844 if (try_again)
23846 if (swap_operands)
23847 std::swap (op0, op1);
23849 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
23850 if (mask)
23851 return mask;
23854 /* You only get two chances. */
23855 return NULL_RTX;
23858 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
23859 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
23860 operands for the relation operation COND. */
23863 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
23864 rtx cond, rtx cc_op0, rtx cc_op1)
23866 machine_mode dest_mode = GET_MODE (dest);
23867 machine_mode mask_mode = GET_MODE (cc_op0);
23868 enum rtx_code rcode = GET_CODE (cond);
23869 machine_mode cc_mode = CCmode;
23870 rtx mask;
23871 rtx cond2;
23872 rtx tmp;
23873 bool invert_move = false;
23875 if (VECTOR_UNIT_NONE_P (dest_mode))
23876 return 0;
23878 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
23879 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
23881 switch (rcode)
23883 /* Swap operands if we can, and fall back to doing the operation as
23884 specified, and doing a NOR to invert the test. */
23885 case NE:
23886 case UNLE:
23887 case UNLT:
23888 case UNGE:
23889 case UNGT:
23890 /* Invert condition and try again.
23891 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
23892 invert_move = true;
23893 rcode = reverse_condition_maybe_unordered (rcode);
23894 if (rcode == UNKNOWN)
23895 return 0;
23896 break;
23898 /* Mark unsigned tests with CCUNSmode. */
23899 case GTU:
23900 case GEU:
23901 case LTU:
23902 case LEU:
23903 cc_mode = CCUNSmode;
23904 break;
23906 default:
23907 break;
23910 /* Get the vector mask for the given relational operations. */
23911 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
23913 if (!mask)
23914 return 0;
23916 if (invert_move)
23918 tmp = op_true;
23919 op_true = op_false;
23920 op_false = tmp;
23923 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
23924 CONST0_RTX (dest_mode));
23925 emit_insn (gen_rtx_SET (dest,
23926 gen_rtx_IF_THEN_ELSE (dest_mode,
23927 cond2,
23928 op_true,
23929 op_false)));
23930 return 1;
23933 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
23934 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
23935 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
23936 hardware has no such operation. */
23938 static int
23939 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23941 enum rtx_code code = GET_CODE (op);
23942 rtx op0 = XEXP (op, 0);
23943 rtx op1 = XEXP (op, 1);
23944 machine_mode compare_mode = GET_MODE (op0);
23945 machine_mode result_mode = GET_MODE (dest);
23946 bool max_p = false;
23948 if (result_mode != compare_mode)
23949 return 0;
23951 if (code == GE || code == GT)
23952 max_p = true;
23953 else if (code == LE || code == LT)
23954 max_p = false;
23955 else
23956 return 0;
23958 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
23961 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
23962 max_p = !max_p;
23964 else
23965 return 0;
23967 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
23968 return 1;
23971 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
23972 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
23973 operands of the last comparison is nonzero/true, FALSE_COND if it is
23974 zero/false. Return 0 if the hardware has no such operation. */
23976 static int
23977 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23979 enum rtx_code code = GET_CODE (op);
23980 rtx op0 = XEXP (op, 0);
23981 rtx op1 = XEXP (op, 1);
23982 machine_mode result_mode = GET_MODE (dest);
23983 rtx compare_rtx;
23984 rtx cmove_rtx;
23985 rtx clobber_rtx;
23987 if (!can_create_pseudo_p ())
23988 return 0;
23990 switch (code)
23992 case EQ:
23993 case GE:
23994 case GT:
23995 break;
23997 case NE:
23998 case LT:
23999 case LE:
24000 code = swap_condition (code);
24001 std::swap (op0, op1);
24002 break;
24004 default:
24005 return 0;
24008 /* Generate: [(parallel [(set (dest)
24009 (if_then_else (op (cmp1) (cmp2))
24010 (true)
24011 (false)))
24012 (clobber (scratch))])]. */
24014 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
24015 cmove_rtx = gen_rtx_SET (dest,
24016 gen_rtx_IF_THEN_ELSE (result_mode,
24017 compare_rtx,
24018 true_cond,
24019 false_cond));
24021 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
24022 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24023 gen_rtvec (2, cmove_rtx, clobber_rtx)));
24025 return 1;
24028 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
24029 operands of the last comparison is nonzero/true, FALSE_COND if it
24030 is zero/false. Return 0 if the hardware has no such operation. */
24033 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24035 enum rtx_code code = GET_CODE (op);
24036 rtx op0 = XEXP (op, 0);
24037 rtx op1 = XEXP (op, 1);
24038 machine_mode compare_mode = GET_MODE (op0);
24039 machine_mode result_mode = GET_MODE (dest);
24040 rtx temp;
24041 bool is_against_zero;
24043 /* These modes should always match. */
24044 if (GET_MODE (op1) != compare_mode
24045 /* In the isel case however, we can use a compare immediate, so
24046 op1 may be a small constant. */
24047 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
24048 return 0;
24049 if (GET_MODE (true_cond) != result_mode)
24050 return 0;
24051 if (GET_MODE (false_cond) != result_mode)
24052 return 0;
24054 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
24055 if (TARGET_P9_MINMAX
24056 && (compare_mode == SFmode || compare_mode == DFmode)
24057 && (result_mode == SFmode || result_mode == DFmode))
24059 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
24060 return 1;
24062 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
24063 return 1;
24066 /* Don't allow using floating point comparisons for integer results for
24067 now. */
24068 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
24069 return 0;
24071 /* First, work out if the hardware can do this at all, or
24072 if it's too slow.... */
24073 if (!FLOAT_MODE_P (compare_mode))
24075 if (TARGET_ISEL)
24076 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
24077 return 0;
24079 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
24080 && SCALAR_FLOAT_MODE_P (compare_mode))
24081 return 0;
24083 is_against_zero = op1 == CONST0_RTX (compare_mode);
24085 /* A floating-point subtract might overflow, underflow, or produce
24086 an inexact result, thus changing the floating-point flags, so it
24087 can't be generated if we care about that. It's safe if one side
24088 of the construct is zero, since then no subtract will be
24089 generated. */
24090 if (SCALAR_FLOAT_MODE_P (compare_mode)
24091 && flag_trapping_math && ! is_against_zero)
24092 return 0;
24094 /* Eliminate half of the comparisons by switching operands, this
24095 makes the remaining code simpler. */
24096 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
24097 || code == LTGT || code == LT || code == UNLE)
24099 code = reverse_condition_maybe_unordered (code);
24100 temp = true_cond;
24101 true_cond = false_cond;
24102 false_cond = temp;
24105 /* UNEQ and LTGT take four instructions for a comparison with zero,
24106 it'll probably be faster to use a branch here too. */
24107 if (code == UNEQ && HONOR_NANS (compare_mode))
24108 return 0;
24110 /* We're going to try to implement comparisons by performing
24111 a subtract, then comparing against zero. Unfortunately,
24112 Inf - Inf is NaN which is not zero, and so if we don't
24113 know that the operand is finite and the comparison
24114 would treat EQ different to UNORDERED, we can't do it. */
24115 if (HONOR_INFINITIES (compare_mode)
24116 && code != GT && code != UNGE
24117 && (GET_CODE (op1) != CONST_DOUBLE
24118 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
24119 /* Constructs of the form (a OP b ? a : b) are safe. */
24120 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
24121 || (! rtx_equal_p (op0, true_cond)
24122 && ! rtx_equal_p (op1, true_cond))))
24123 return 0;
24125 /* At this point we know we can use fsel. */
24127 /* Reduce the comparison to a comparison against zero. */
24128 if (! is_against_zero)
24130 temp = gen_reg_rtx (compare_mode);
24131 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
24132 op0 = temp;
24133 op1 = CONST0_RTX (compare_mode);
24136 /* If we don't care about NaNs we can reduce some of the comparisons
24137 down to faster ones. */
24138 if (! HONOR_NANS (compare_mode))
24139 switch (code)
24141 case GT:
24142 code = LE;
24143 temp = true_cond;
24144 true_cond = false_cond;
24145 false_cond = temp;
24146 break;
24147 case UNGE:
24148 code = GE;
24149 break;
24150 case UNEQ:
24151 code = EQ;
24152 break;
24153 default:
24154 break;
24157 /* Now, reduce everything down to a GE. */
24158 switch (code)
24160 case GE:
24161 break;
24163 case LE:
24164 temp = gen_reg_rtx (compare_mode);
24165 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24166 op0 = temp;
24167 break;
24169 case ORDERED:
24170 temp = gen_reg_rtx (compare_mode);
24171 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
24172 op0 = temp;
24173 break;
24175 case EQ:
24176 temp = gen_reg_rtx (compare_mode);
24177 emit_insn (gen_rtx_SET (temp,
24178 gen_rtx_NEG (compare_mode,
24179 gen_rtx_ABS (compare_mode, op0))));
24180 op0 = temp;
24181 break;
24183 case UNGE:
24184 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
24185 temp = gen_reg_rtx (result_mode);
24186 emit_insn (gen_rtx_SET (temp,
24187 gen_rtx_IF_THEN_ELSE (result_mode,
24188 gen_rtx_GE (VOIDmode,
24189 op0, op1),
24190 true_cond, false_cond)));
24191 false_cond = true_cond;
24192 true_cond = temp;
24194 temp = gen_reg_rtx (compare_mode);
24195 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24196 op0 = temp;
24197 break;
24199 case GT:
24200 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
24201 temp = gen_reg_rtx (result_mode);
24202 emit_insn (gen_rtx_SET (temp,
24203 gen_rtx_IF_THEN_ELSE (result_mode,
24204 gen_rtx_GE (VOIDmode,
24205 op0, op1),
24206 true_cond, false_cond)));
24207 true_cond = false_cond;
24208 false_cond = temp;
24210 temp = gen_reg_rtx (compare_mode);
24211 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24212 op0 = temp;
24213 break;
24215 default:
24216 gcc_unreachable ();
24219 emit_insn (gen_rtx_SET (dest,
24220 gen_rtx_IF_THEN_ELSE (result_mode,
24221 gen_rtx_GE (VOIDmode,
24222 op0, op1),
24223 true_cond, false_cond)));
24224 return 1;
24227 /* Same as above, but for ints (isel). */
24229 static int
24230 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24232 rtx condition_rtx, cr;
24233 machine_mode mode = GET_MODE (dest);
24234 enum rtx_code cond_code;
24235 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
24236 bool signedp;
24238 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
24239 return 0;
24241 /* We still have to do the compare, because isel doesn't do a
24242 compare, it just looks at the CRx bits set by a previous compare
24243 instruction. */
24244 condition_rtx = rs6000_generate_compare (op, mode);
24245 cond_code = GET_CODE (condition_rtx);
24246 cr = XEXP (condition_rtx, 0);
24247 signedp = GET_MODE (cr) == CCmode;
24249 isel_func = (mode == SImode
24250 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
24251 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
24253 switch (cond_code)
24255 case LT: case GT: case LTU: case GTU: case EQ:
24256 /* isel handles these directly. */
24257 break;
24259 default:
24260 /* We need to swap the sense of the comparison. */
24262 std::swap (false_cond, true_cond);
24263 PUT_CODE (condition_rtx, reverse_condition (cond_code));
24265 break;
24268 false_cond = force_reg (mode, false_cond);
24269 if (true_cond != const0_rtx)
24270 true_cond = force_reg (mode, true_cond);
24272 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
24274 return 1;
24277 const char *
24278 output_isel (rtx *operands)
24280 enum rtx_code code;
24282 code = GET_CODE (operands[1]);
24284 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
24286 gcc_assert (GET_CODE (operands[2]) == REG
24287 && GET_CODE (operands[3]) == REG);
24288 PUT_CODE (operands[1], reverse_condition (code));
24289 return "isel %0,%3,%2,%j1";
24292 return "isel %0,%2,%3,%j1";
24295 void
24296 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
24298 machine_mode mode = GET_MODE (op0);
24299 enum rtx_code c;
24300 rtx target;
24302 /* VSX/altivec have direct min/max insns. */
24303 if ((code == SMAX || code == SMIN)
24304 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
24305 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
24307 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
24308 return;
24311 if (code == SMAX || code == SMIN)
24312 c = GE;
24313 else
24314 c = GEU;
24316 if (code == SMAX || code == UMAX)
24317 target = emit_conditional_move (dest, c, op0, op1, mode,
24318 op0, op1, mode, 0);
24319 else
24320 target = emit_conditional_move (dest, c, op0, op1, mode,
24321 op1, op0, mode, 0);
24322 gcc_assert (target);
24323 if (target != dest)
24324 emit_move_insn (dest, target);
24327 /* Split a signbit operation on 64-bit machines with direct move. Also allow
24328 for the value to come from memory or if it is already loaded into a GPR. */
24330 void
24331 rs6000_split_signbit (rtx dest, rtx src)
24333 machine_mode d_mode = GET_MODE (dest);
24334 machine_mode s_mode = GET_MODE (src);
24335 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
24336 rtx shift_reg = dest_di;
24338 gcc_assert (REG_P (dest));
24339 gcc_assert (REG_P (src) || MEM_P (src));
24340 gcc_assert (s_mode == KFmode || s_mode == TFmode);
24342 if (MEM_P (src))
24344 rtx mem = (WORDS_BIG_ENDIAN
24345 ? adjust_address (src, DImode, 0)
24346 : adjust_address (src, DImode, 8));
24347 emit_insn (gen_rtx_SET (dest_di, mem));
24350 else
24352 unsigned int r = REGNO (src);
24354 /* If this is a VSX register, generate the special mfvsrd instruction
24355 to get it in a GPR. Until we support SF and DF modes, that will
24356 always be true. */
24357 gcc_assert (VSX_REGNO_P (r));
24359 if (s_mode == KFmode)
24360 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
24361 else
24362 emit_insn (gen_signbittf2_dm2 (dest_di, src));
24365 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
24366 return;
24369 /* A subroutine of the atomic operation splitters. Jump to LABEL if
24370 COND is true. Mark the jump as unlikely to be taken. */
24372 static void
24373 emit_unlikely_jump (rtx cond, rtx label)
24375 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
24376 rtx x;
24378 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
24379 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
24380 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
24383 /* A subroutine of the atomic operation splitters. Emit a load-locked
24384 instruction in MODE. For QI/HImode, possibly use a pattern than includes
24385 the zero_extend operation. */
24387 static void
24388 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
24390 rtx (*fn) (rtx, rtx) = NULL;
24392 switch (mode)
24394 case QImode:
24395 fn = gen_load_lockedqi;
24396 break;
24397 case HImode:
24398 fn = gen_load_lockedhi;
24399 break;
24400 case SImode:
24401 if (GET_MODE (mem) == QImode)
24402 fn = gen_load_lockedqi_si;
24403 else if (GET_MODE (mem) == HImode)
24404 fn = gen_load_lockedhi_si;
24405 else
24406 fn = gen_load_lockedsi;
24407 break;
24408 case DImode:
24409 fn = gen_load_lockeddi;
24410 break;
24411 case TImode:
24412 fn = gen_load_lockedti;
24413 break;
24414 default:
24415 gcc_unreachable ();
24417 emit_insn (fn (reg, mem));
24420 /* A subroutine of the atomic operation splitters. Emit a store-conditional
24421 instruction in MODE. */
24423 static void
24424 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
24426 rtx (*fn) (rtx, rtx, rtx) = NULL;
24428 switch (mode)
24430 case QImode:
24431 fn = gen_store_conditionalqi;
24432 break;
24433 case HImode:
24434 fn = gen_store_conditionalhi;
24435 break;
24436 case SImode:
24437 fn = gen_store_conditionalsi;
24438 break;
24439 case DImode:
24440 fn = gen_store_conditionaldi;
24441 break;
24442 case TImode:
24443 fn = gen_store_conditionalti;
24444 break;
24445 default:
24446 gcc_unreachable ();
24449 /* Emit sync before stwcx. to address PPC405 Erratum. */
24450 if (PPC405_ERRATUM77)
24451 emit_insn (gen_hwsync ());
24453 emit_insn (fn (res, mem, val));
24456 /* Expand barriers before and after a load_locked/store_cond sequence. */
24458 static rtx
24459 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
24461 rtx addr = XEXP (mem, 0);
24462 int strict_p = (reload_in_progress || reload_completed);
24464 if (!legitimate_indirect_address_p (addr, strict_p)
24465 && !legitimate_indexed_address_p (addr, strict_p))
24467 addr = force_reg (Pmode, addr);
24468 mem = replace_equiv_address_nv (mem, addr);
24471 switch (model)
24473 case MEMMODEL_RELAXED:
24474 case MEMMODEL_CONSUME:
24475 case MEMMODEL_ACQUIRE:
24476 break;
24477 case MEMMODEL_RELEASE:
24478 case MEMMODEL_ACQ_REL:
24479 emit_insn (gen_lwsync ());
24480 break;
24481 case MEMMODEL_SEQ_CST:
24482 emit_insn (gen_hwsync ());
24483 break;
24484 default:
24485 gcc_unreachable ();
24487 return mem;
24490 static void
24491 rs6000_post_atomic_barrier (enum memmodel model)
24493 switch (model)
24495 case MEMMODEL_RELAXED:
24496 case MEMMODEL_CONSUME:
24497 case MEMMODEL_RELEASE:
24498 break;
24499 case MEMMODEL_ACQUIRE:
24500 case MEMMODEL_ACQ_REL:
24501 case MEMMODEL_SEQ_CST:
24502 emit_insn (gen_isync ());
24503 break;
24504 default:
24505 gcc_unreachable ();
24509 /* A subroutine of the various atomic expanders. For sub-word operations,
24510 we must adjust things to operate on SImode. Given the original MEM,
24511 return a new aligned memory. Also build and return the quantities by
24512 which to shift and mask. */
24514 static rtx
24515 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
24517 rtx addr, align, shift, mask, mem;
24518 HOST_WIDE_INT shift_mask;
24519 machine_mode mode = GET_MODE (orig_mem);
24521 /* For smaller modes, we have to implement this via SImode. */
24522 shift_mask = (mode == QImode ? 0x18 : 0x10);
24524 addr = XEXP (orig_mem, 0);
24525 addr = force_reg (GET_MODE (addr), addr);
24527 /* Aligned memory containing subword. Generate a new memory. We
24528 do not want any of the existing MEM_ATTR data, as we're now
24529 accessing memory outside the original object. */
24530 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
24531 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24532 mem = gen_rtx_MEM (SImode, align);
24533 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
24534 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
24535 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
24537 /* Shift amount for subword relative to aligned word. */
24538 shift = gen_reg_rtx (SImode);
24539 addr = gen_lowpart (SImode, addr);
24540 rtx tmp = gen_reg_rtx (SImode);
24541 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
24542 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
24543 if (BYTES_BIG_ENDIAN)
24544 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
24545 shift, 1, OPTAB_LIB_WIDEN);
24546 *pshift = shift;
24548 /* Mask for insertion. */
24549 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
24550 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
24551 *pmask = mask;
24553 return mem;
24556 /* A subroutine of the various atomic expanders. For sub-word operands,
24557 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
24559 static rtx
24560 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
24562 rtx x;
24564 x = gen_reg_rtx (SImode);
24565 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
24566 gen_rtx_NOT (SImode, mask),
24567 oldval)));
24569 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
24571 return x;
24574 /* A subroutine of the various atomic expanders. For sub-word operands,
24575 extract WIDE to NARROW via SHIFT. */
24577 static void
24578 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
24580 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
24581 wide, 1, OPTAB_LIB_WIDEN);
24582 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
24585 /* Expand an atomic compare and swap operation. */
24587 void
24588 rs6000_expand_atomic_compare_and_swap (rtx operands[])
24590 rtx boolval, retval, mem, oldval, newval, cond;
24591 rtx label1, label2, x, mask, shift;
24592 machine_mode mode, orig_mode;
24593 enum memmodel mod_s, mod_f;
24594 bool is_weak;
24596 boolval = operands[0];
24597 retval = operands[1];
24598 mem = operands[2];
24599 oldval = operands[3];
24600 newval = operands[4];
24601 is_weak = (INTVAL (operands[5]) != 0);
24602 mod_s = memmodel_base (INTVAL (operands[6]));
24603 mod_f = memmodel_base (INTVAL (operands[7]));
24604 orig_mode = mode = GET_MODE (mem);
24606 mask = shift = NULL_RTX;
24607 if (mode == QImode || mode == HImode)
24609 /* Before power8, we didn't have access to lbarx/lharx, so generate a
24610 lwarx and shift/mask operations. With power8, we need to do the
24611 comparison in SImode, but the store is still done in QI/HImode. */
24612 oldval = convert_modes (SImode, mode, oldval, 1);
24614 if (!TARGET_SYNC_HI_QI)
24616 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24618 /* Shift and mask OLDVAL into position with the word. */
24619 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
24620 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24622 /* Shift and mask NEWVAL into position within the word. */
24623 newval = convert_modes (SImode, mode, newval, 1);
24624 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
24625 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24628 /* Prepare to adjust the return value. */
24629 retval = gen_reg_rtx (SImode);
24630 mode = SImode;
24632 else if (reg_overlap_mentioned_p (retval, oldval))
24633 oldval = copy_to_reg (oldval);
24635 if (mode != TImode && !reg_or_short_operand (oldval, mode))
24636 oldval = copy_to_mode_reg (mode, oldval);
24638 if (reg_overlap_mentioned_p (retval, newval))
24639 newval = copy_to_reg (newval);
24641 mem = rs6000_pre_atomic_barrier (mem, mod_s);
24643 label1 = NULL_RTX;
24644 if (!is_weak)
24646 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24647 emit_label (XEXP (label1, 0));
24649 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24651 emit_load_locked (mode, retval, mem);
24653 x = retval;
24654 if (mask)
24655 x = expand_simple_binop (SImode, AND, retval, mask,
24656 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24658 cond = gen_reg_rtx (CCmode);
24659 /* If we have TImode, synthesize a comparison. */
24660 if (mode != TImode)
24661 x = gen_rtx_COMPARE (CCmode, x, oldval);
24662 else
24664 rtx xor1_result = gen_reg_rtx (DImode);
24665 rtx xor2_result = gen_reg_rtx (DImode);
24666 rtx or_result = gen_reg_rtx (DImode);
24667 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
24668 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
24669 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
24670 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
24672 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
24673 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
24674 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
24675 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
24678 emit_insn (gen_rtx_SET (cond, x));
24680 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24681 emit_unlikely_jump (x, label2);
24683 x = newval;
24684 if (mask)
24685 x = rs6000_mask_atomic_subword (retval, newval, mask);
24687 emit_store_conditional (orig_mode, cond, mem, x);
24689 if (!is_weak)
24691 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24692 emit_unlikely_jump (x, label1);
24695 if (!is_mm_relaxed (mod_f))
24696 emit_label (XEXP (label2, 0));
24698 rs6000_post_atomic_barrier (mod_s);
24700 if (is_mm_relaxed (mod_f))
24701 emit_label (XEXP (label2, 0));
24703 if (shift)
24704 rs6000_finish_atomic_subword (operands[1], retval, shift);
24705 else if (mode != GET_MODE (operands[1]))
24706 convert_move (operands[1], retval, 1);
24708 /* In all cases, CR0 contains EQ on success, and NE on failure. */
24709 x = gen_rtx_EQ (SImode, cond, const0_rtx);
24710 emit_insn (gen_rtx_SET (boolval, x));
24713 /* Expand an atomic exchange operation. */
24715 void
24716 rs6000_expand_atomic_exchange (rtx operands[])
24718 rtx retval, mem, val, cond;
24719 machine_mode mode;
24720 enum memmodel model;
24721 rtx label, x, mask, shift;
24723 retval = operands[0];
24724 mem = operands[1];
24725 val = operands[2];
24726 model = memmodel_base (INTVAL (operands[3]));
24727 mode = GET_MODE (mem);
24729 mask = shift = NULL_RTX;
24730 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
24732 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24734 /* Shift and mask VAL into position with the word. */
24735 val = convert_modes (SImode, mode, val, 1);
24736 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24737 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24739 /* Prepare to adjust the return value. */
24740 retval = gen_reg_rtx (SImode);
24741 mode = SImode;
24744 mem = rs6000_pre_atomic_barrier (mem, model);
24746 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24747 emit_label (XEXP (label, 0));
24749 emit_load_locked (mode, retval, mem);
24751 x = val;
24752 if (mask)
24753 x = rs6000_mask_atomic_subword (retval, val, mask);
24755 cond = gen_reg_rtx (CCmode);
24756 emit_store_conditional (mode, cond, mem, x);
24758 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24759 emit_unlikely_jump (x, label);
24761 rs6000_post_atomic_barrier (model);
24763 if (shift)
24764 rs6000_finish_atomic_subword (operands[0], retval, shift);
24767 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
24768 to perform. MEM is the memory on which to operate. VAL is the second
24769 operand of the binary operator. BEFORE and AFTER are optional locations to
24770 return the value of MEM either before of after the operation. MODEL_RTX
24771 is a CONST_INT containing the memory model to use. */
24773 void
24774 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
24775 rtx orig_before, rtx orig_after, rtx model_rtx)
24777 enum memmodel model = memmodel_base (INTVAL (model_rtx));
24778 machine_mode mode = GET_MODE (mem);
24779 machine_mode store_mode = mode;
24780 rtx label, x, cond, mask, shift;
24781 rtx before = orig_before, after = orig_after;
24783 mask = shift = NULL_RTX;
24784 /* On power8, we want to use SImode for the operation. On previous systems,
24785 use the operation in a subword and shift/mask to get the proper byte or
24786 halfword. */
24787 if (mode == QImode || mode == HImode)
24789 if (TARGET_SYNC_HI_QI)
24791 val = convert_modes (SImode, mode, val, 1);
24793 /* Prepare to adjust the return value. */
24794 before = gen_reg_rtx (SImode);
24795 if (after)
24796 after = gen_reg_rtx (SImode);
24797 mode = SImode;
24799 else
24801 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24803 /* Shift and mask VAL into position with the word. */
24804 val = convert_modes (SImode, mode, val, 1);
24805 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24806 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24808 switch (code)
24810 case IOR:
24811 case XOR:
24812 /* We've already zero-extended VAL. That is sufficient to
24813 make certain that it does not affect other bits. */
24814 mask = NULL;
24815 break;
24817 case AND:
24818 /* If we make certain that all of the other bits in VAL are
24819 set, that will be sufficient to not affect other bits. */
24820 x = gen_rtx_NOT (SImode, mask);
24821 x = gen_rtx_IOR (SImode, x, val);
24822 emit_insn (gen_rtx_SET (val, x));
24823 mask = NULL;
24824 break;
24826 case NOT:
24827 case PLUS:
24828 case MINUS:
24829 /* These will all affect bits outside the field and need
24830 adjustment via MASK within the loop. */
24831 break;
24833 default:
24834 gcc_unreachable ();
24837 /* Prepare to adjust the return value. */
24838 before = gen_reg_rtx (SImode);
24839 if (after)
24840 after = gen_reg_rtx (SImode);
24841 store_mode = mode = SImode;
24845 mem = rs6000_pre_atomic_barrier (mem, model);
24847 label = gen_label_rtx ();
24848 emit_label (label);
24849 label = gen_rtx_LABEL_REF (VOIDmode, label);
24851 if (before == NULL_RTX)
24852 before = gen_reg_rtx (mode);
24854 emit_load_locked (mode, before, mem);
24856 if (code == NOT)
24858 x = expand_simple_binop (mode, AND, before, val,
24859 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24860 after = expand_simple_unop (mode, NOT, x, after, 1);
24862 else
24864 after = expand_simple_binop (mode, code, before, val,
24865 after, 1, OPTAB_LIB_WIDEN);
24868 x = after;
24869 if (mask)
24871 x = expand_simple_binop (SImode, AND, after, mask,
24872 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24873 x = rs6000_mask_atomic_subword (before, x, mask);
24875 else if (store_mode != mode)
24876 x = convert_modes (store_mode, mode, x, 1);
24878 cond = gen_reg_rtx (CCmode);
24879 emit_store_conditional (store_mode, cond, mem, x);
24881 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24882 emit_unlikely_jump (x, label);
24884 rs6000_post_atomic_barrier (model);
24886 if (shift)
24888 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
24889 then do the calcuations in a SImode register. */
24890 if (orig_before)
24891 rs6000_finish_atomic_subword (orig_before, before, shift);
24892 if (orig_after)
24893 rs6000_finish_atomic_subword (orig_after, after, shift);
24895 else if (store_mode != mode)
24897 /* QImode/HImode on machines with lbarx/lharx where we do the native
24898 operation and then do the calcuations in a SImode register. */
24899 if (orig_before)
24900 convert_move (orig_before, before, 1);
24901 if (orig_after)
24902 convert_move (orig_after, after, 1);
24904 else if (orig_after && after != orig_after)
24905 emit_move_insn (orig_after, after);
24908 /* Emit instructions to move SRC to DST. Called by splitters for
24909 multi-register moves. It will emit at most one instruction for
24910 each register that is accessed; that is, it won't emit li/lis pairs
24911 (or equivalent for 64-bit code). One of SRC or DST must be a hard
24912 register. */
24914 void
24915 rs6000_split_multireg_move (rtx dst, rtx src)
24917 /* The register number of the first register being moved. */
24918 int reg;
24919 /* The mode that is to be moved. */
24920 machine_mode mode;
24921 /* The mode that the move is being done in, and its size. */
24922 machine_mode reg_mode;
24923 int reg_mode_size;
24924 /* The number of registers that will be moved. */
24925 int nregs;
24927 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
24928 mode = GET_MODE (dst);
24929 nregs = hard_regno_nregs[reg][mode];
24930 if (FP_REGNO_P (reg))
24931 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
24932 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
24933 else if (ALTIVEC_REGNO_P (reg))
24934 reg_mode = V16QImode;
24935 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
24936 reg_mode = DFmode;
24937 else
24938 reg_mode = word_mode;
24939 reg_mode_size = GET_MODE_SIZE (reg_mode);
24941 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
24943 /* TDmode residing in FP registers is special, since the ISA requires that
24944 the lower-numbered word of a register pair is always the most significant
24945 word, even in little-endian mode. This does not match the usual subreg
24946 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
24947 the appropriate constituent registers "by hand" in little-endian mode.
24949 Note we do not need to check for destructive overlap here since TDmode
24950 can only reside in even/odd register pairs. */
24951 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
24953 rtx p_src, p_dst;
24954 int i;
24956 for (i = 0; i < nregs; i++)
24958 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
24959 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
24960 else
24961 p_src = simplify_gen_subreg (reg_mode, src, mode,
24962 i * reg_mode_size);
24964 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
24965 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
24966 else
24967 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
24968 i * reg_mode_size);
24970 emit_insn (gen_rtx_SET (p_dst, p_src));
24973 return;
24976 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
24978 /* Move register range backwards, if we might have destructive
24979 overlap. */
24980 int i;
24981 for (i = nregs - 1; i >= 0; i--)
24982 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24983 i * reg_mode_size),
24984 simplify_gen_subreg (reg_mode, src, mode,
24985 i * reg_mode_size)));
24987 else
24989 int i;
24990 int j = -1;
24991 bool used_update = false;
24992 rtx restore_basereg = NULL_RTX;
24994 if (MEM_P (src) && INT_REGNO_P (reg))
24996 rtx breg;
24998 if (GET_CODE (XEXP (src, 0)) == PRE_INC
24999 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
25001 rtx delta_rtx;
25002 breg = XEXP (XEXP (src, 0), 0);
25003 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
25004 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
25005 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
25006 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25007 src = replace_equiv_address (src, breg);
25009 else if (! rs6000_offsettable_memref_p (src, reg_mode))
25011 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
25013 rtx basereg = XEXP (XEXP (src, 0), 0);
25014 if (TARGET_UPDATE)
25016 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
25017 emit_insn (gen_rtx_SET (ndst,
25018 gen_rtx_MEM (reg_mode,
25019 XEXP (src, 0))));
25020 used_update = true;
25022 else
25023 emit_insn (gen_rtx_SET (basereg,
25024 XEXP (XEXP (src, 0), 1)));
25025 src = replace_equiv_address (src, basereg);
25027 else
25029 rtx basereg = gen_rtx_REG (Pmode, reg);
25030 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
25031 src = replace_equiv_address (src, basereg);
25035 breg = XEXP (src, 0);
25036 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
25037 breg = XEXP (breg, 0);
25039 /* If the base register we are using to address memory is
25040 also a destination reg, then change that register last. */
25041 if (REG_P (breg)
25042 && REGNO (breg) >= REGNO (dst)
25043 && REGNO (breg) < REGNO (dst) + nregs)
25044 j = REGNO (breg) - REGNO (dst);
25046 else if (MEM_P (dst) && INT_REGNO_P (reg))
25048 rtx breg;
25050 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
25051 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
25053 rtx delta_rtx;
25054 breg = XEXP (XEXP (dst, 0), 0);
25055 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
25056 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
25057 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
25059 /* We have to update the breg before doing the store.
25060 Use store with update, if available. */
25062 if (TARGET_UPDATE)
25064 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25065 emit_insn (TARGET_32BIT
25066 ? (TARGET_POWERPC64
25067 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
25068 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
25069 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
25070 used_update = true;
25072 else
25073 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25074 dst = replace_equiv_address (dst, breg);
25076 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
25077 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
25079 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
25081 rtx basereg = XEXP (XEXP (dst, 0), 0);
25082 if (TARGET_UPDATE)
25084 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25085 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
25086 XEXP (dst, 0)),
25087 nsrc));
25088 used_update = true;
25090 else
25091 emit_insn (gen_rtx_SET (basereg,
25092 XEXP (XEXP (dst, 0), 1)));
25093 dst = replace_equiv_address (dst, basereg);
25095 else
25097 rtx basereg = XEXP (XEXP (dst, 0), 0);
25098 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
25099 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
25100 && REG_P (basereg)
25101 && REG_P (offsetreg)
25102 && REGNO (basereg) != REGNO (offsetreg));
25103 if (REGNO (basereg) == 0)
25105 rtx tmp = offsetreg;
25106 offsetreg = basereg;
25107 basereg = tmp;
25109 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
25110 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
25111 dst = replace_equiv_address (dst, basereg);
25114 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
25115 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
25118 for (i = 0; i < nregs; i++)
25120 /* Calculate index to next subword. */
25121 ++j;
25122 if (j == nregs)
25123 j = 0;
25125 /* If compiler already emitted move of first word by
25126 store with update, no need to do anything. */
25127 if (j == 0 && used_update)
25128 continue;
25130 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
25131 j * reg_mode_size),
25132 simplify_gen_subreg (reg_mode, src, mode,
25133 j * reg_mode_size)));
25135 if (restore_basereg != NULL_RTX)
25136 emit_insn (restore_basereg);
25141 /* This page contains routines that are used to determine what the
25142 function prologue and epilogue code will do and write them out. */
25144 static inline bool
25145 save_reg_p (int r)
25147 return !call_used_regs[r] && df_regs_ever_live_p (r);
25150 /* Determine whether the gp REG is really used. */
25152 static bool
25153 rs6000_reg_live_or_pic_offset_p (int reg)
25155 /* We need to mark the PIC offset register live for the same conditions
25156 as it is set up, or otherwise it won't be saved before we clobber it. */
25158 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
25160 if (TARGET_TOC && TARGET_MINIMAL_TOC
25161 && (crtl->calls_eh_return
25162 || df_regs_ever_live_p (reg)
25163 || get_pool_size ()))
25164 return true;
25166 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
25167 && flag_pic)
25168 return true;
25171 /* If the function calls eh_return, claim used all the registers that would
25172 be checked for liveness otherwise. */
25174 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
25175 && !call_used_regs[reg]);
25178 /* Return the first fixed-point register that is required to be
25179 saved. 32 if none. */
25182 first_reg_to_save (void)
25184 int first_reg;
25186 /* Find lowest numbered live register. */
25187 for (first_reg = 13; first_reg <= 31; first_reg++)
25188 if (save_reg_p (first_reg))
25189 break;
25191 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
25192 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
25193 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25194 || (TARGET_TOC && TARGET_MINIMAL_TOC))
25195 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
25196 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
25198 #if TARGET_MACHO
25199 if (flag_pic
25200 && crtl->uses_pic_offset_table
25201 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
25202 return RS6000_PIC_OFFSET_TABLE_REGNUM;
25203 #endif
25205 return first_reg;
25208 /* Similar, for FP regs. */
25211 first_fp_reg_to_save (void)
25213 int first_reg;
25215 /* Find lowest numbered live register. */
25216 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
25217 if (save_reg_p (first_reg))
25218 break;
25220 return first_reg;
25223 /* Similar, for AltiVec regs. */
25225 static int
25226 first_altivec_reg_to_save (void)
25228 int i;
25230 /* Stack frame remains as is unless we are in AltiVec ABI. */
25231 if (! TARGET_ALTIVEC_ABI)
25232 return LAST_ALTIVEC_REGNO + 1;
25234 /* On Darwin, the unwind routines are compiled without
25235 TARGET_ALTIVEC, and use save_world to save/restore the
25236 altivec registers when necessary. */
25237 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25238 && ! TARGET_ALTIVEC)
25239 return FIRST_ALTIVEC_REGNO + 20;
25241 /* Find lowest numbered live register. */
25242 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
25243 if (save_reg_p (i))
25244 break;
25246 return i;
25249 /* Return a 32-bit mask of the AltiVec registers we need to set in
25250 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
25251 the 32-bit word is 0. */
25253 static unsigned int
25254 compute_vrsave_mask (void)
25256 unsigned int i, mask = 0;
25258 /* On Darwin, the unwind routines are compiled without
25259 TARGET_ALTIVEC, and use save_world to save/restore the
25260 call-saved altivec registers when necessary. */
25261 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25262 && ! TARGET_ALTIVEC)
25263 mask |= 0xFFF;
25265 /* First, find out if we use _any_ altivec registers. */
25266 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25267 if (df_regs_ever_live_p (i))
25268 mask |= ALTIVEC_REG_BIT (i);
25270 if (mask == 0)
25271 return mask;
25273 /* Next, remove the argument registers from the set. These must
25274 be in the VRSAVE mask set by the caller, so we don't need to add
25275 them in again. More importantly, the mask we compute here is
25276 used to generate CLOBBERs in the set_vrsave insn, and we do not
25277 wish the argument registers to die. */
25278 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
25279 mask &= ~ALTIVEC_REG_BIT (i);
25281 /* Similarly, remove the return value from the set. */
25283 bool yes = false;
25284 diddle_return_value (is_altivec_return_reg, &yes);
25285 if (yes)
25286 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
25289 return mask;
25292 /* For a very restricted set of circumstances, we can cut down the
25293 size of prologues/epilogues by calling our own save/restore-the-world
25294 routines. */
25296 static void
25297 compute_save_world_info (rs6000_stack_t *info)
25299 info->world_save_p = 1;
25300 info->world_save_p
25301 = (WORLD_SAVE_P (info)
25302 && DEFAULT_ABI == ABI_DARWIN
25303 && !cfun->has_nonlocal_label
25304 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
25305 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
25306 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
25307 && info->cr_save_p);
25309 /* This will not work in conjunction with sibcalls. Make sure there
25310 are none. (This check is expensive, but seldom executed.) */
25311 if (WORLD_SAVE_P (info))
25313 rtx_insn *insn;
25314 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
25315 if (CALL_P (insn) && SIBLING_CALL_P (insn))
25317 info->world_save_p = 0;
25318 break;
25322 if (WORLD_SAVE_P (info))
25324 /* Even if we're not touching VRsave, make sure there's room on the
25325 stack for it, if it looks like we're calling SAVE_WORLD, which
25326 will attempt to save it. */
25327 info->vrsave_size = 4;
25329 /* If we are going to save the world, we need to save the link register too. */
25330 info->lr_save_p = 1;
25332 /* "Save" the VRsave register too if we're saving the world. */
25333 if (info->vrsave_mask == 0)
25334 info->vrsave_mask = compute_vrsave_mask ();
25336 /* Because the Darwin register save/restore routines only handle
25337 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
25338 check. */
25339 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
25340 && (info->first_altivec_reg_save
25341 >= FIRST_SAVED_ALTIVEC_REGNO));
25344 return;
25348 static void
25349 is_altivec_return_reg (rtx reg, void *xyes)
25351 bool *yes = (bool *) xyes;
25352 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
25353 *yes = true;
25357 /* Return whether REG is a global user reg or has been specifed by
25358 -ffixed-REG. We should not restore these, and so cannot use
25359 lmw or out-of-line restore functions if there are any. We also
25360 can't save them (well, emit frame notes for them), because frame
25361 unwinding during exception handling will restore saved registers. */
25363 static bool
25364 fixed_reg_p (int reg)
25366 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
25367 backend sets it, overriding anything the user might have given. */
25368 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
25369 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
25370 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25371 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
25372 return false;
25374 return fixed_regs[reg];
25377 /* Determine the strategy for savings/restoring registers. */
25379 enum {
25380 SAVE_MULTIPLE = 0x1,
25381 SAVE_INLINE_GPRS = 0x2,
25382 SAVE_INLINE_FPRS = 0x4,
25383 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
25384 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
25385 SAVE_INLINE_VRS = 0x20,
25386 REST_MULTIPLE = 0x100,
25387 REST_INLINE_GPRS = 0x200,
25388 REST_INLINE_FPRS = 0x400,
25389 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
25390 REST_INLINE_VRS = 0x1000
25393 static int
25394 rs6000_savres_strategy (rs6000_stack_t *info,
25395 bool using_static_chain_p)
25397 int strategy = 0;
25399 /* Select between in-line and out-of-line save and restore of regs.
25400 First, all the obvious cases where we don't use out-of-line. */
25401 if (crtl->calls_eh_return
25402 || cfun->machine->ra_need_lr)
25403 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
25404 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
25405 | SAVE_INLINE_VRS | REST_INLINE_VRS);
25407 if (info->first_gp_reg_save == 32)
25408 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25410 if (info->first_fp_reg_save == 64
25411 /* The out-of-line FP routines use double-precision stores;
25412 we can't use those routines if we don't have such stores. */
25413 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
25414 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25416 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
25417 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25419 /* Define cutoff for using out-of-line functions to save registers. */
25420 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
25422 if (!optimize_size)
25424 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25425 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25426 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25428 else
25430 /* Prefer out-of-line restore if it will exit. */
25431 if (info->first_fp_reg_save > 61)
25432 strategy |= SAVE_INLINE_FPRS;
25433 if (info->first_gp_reg_save > 29)
25435 if (info->first_fp_reg_save == 64)
25436 strategy |= SAVE_INLINE_GPRS;
25437 else
25438 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25440 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
25441 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25444 else if (DEFAULT_ABI == ABI_DARWIN)
25446 if (info->first_fp_reg_save > 60)
25447 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25448 if (info->first_gp_reg_save > 29)
25449 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25450 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25452 else
25454 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25455 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
25456 || info->first_fp_reg_save > 61)
25457 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25458 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25459 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25462 /* Don't bother to try to save things out-of-line if r11 is occupied
25463 by the static chain. It would require too much fiddling and the
25464 static chain is rarely used anyway. FPRs are saved w.r.t the stack
25465 pointer on Darwin, and AIX uses r1 or r12. */
25466 if (using_static_chain_p
25467 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25468 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
25469 | SAVE_INLINE_GPRS
25470 | SAVE_INLINE_VRS);
25472 /* Saving CR interferes with the exit routines used on the SPE, so
25473 just punt here. */
25474 if (TARGET_SPE_ABI
25475 && info->spe_64bit_regs_used
25476 && info->cr_save_p)
25477 strategy |= REST_INLINE_GPRS;
25479 /* We can only use the out-of-line routines to restore fprs if we've
25480 saved all the registers from first_fp_reg_save in the prologue.
25481 Otherwise, we risk loading garbage. Of course, if we have saved
25482 out-of-line then we know we haven't skipped any fprs. */
25483 if ((strategy & SAVE_INLINE_FPRS)
25484 && !(strategy & REST_INLINE_FPRS))
25486 int i;
25488 for (i = info->first_fp_reg_save; i < 64; i++)
25489 if (fixed_regs[i] || !save_reg_p (i))
25491 strategy |= REST_INLINE_FPRS;
25492 break;
25496 /* Similarly, for altivec regs. */
25497 if ((strategy & SAVE_INLINE_VRS)
25498 && !(strategy & REST_INLINE_VRS))
25500 int i;
25502 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
25503 if (fixed_regs[i] || !save_reg_p (i))
25505 strategy |= REST_INLINE_VRS;
25506 break;
25510 /* info->lr_save_p isn't yet set if the only reason lr needs to be
25511 saved is an out-of-line save or restore. Set up the value for
25512 the next test (excluding out-of-line gprs). */
25513 bool lr_save_p = (info->lr_save_p
25514 || !(strategy & SAVE_INLINE_FPRS)
25515 || !(strategy & SAVE_INLINE_VRS)
25516 || !(strategy & REST_INLINE_FPRS)
25517 || !(strategy & REST_INLINE_VRS));
25519 if (TARGET_MULTIPLE
25520 && !TARGET_POWERPC64
25521 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
25522 && info->first_gp_reg_save < 31
25523 && !(flag_shrink_wrap
25524 && flag_shrink_wrap_separate
25525 && optimize_function_for_speed_p (cfun)))
25527 /* Prefer store multiple for saves over out-of-line routines,
25528 since the store-multiple instruction will always be smaller. */
25529 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
25531 /* The situation is more complicated with load multiple. We'd
25532 prefer to use the out-of-line routines for restores, since the
25533 "exit" out-of-line routines can handle the restore of LR and the
25534 frame teardown. However if doesn't make sense to use the
25535 out-of-line routine if that is the only reason we'd need to save
25536 LR, and we can't use the "exit" out-of-line gpr restore if we
25537 have saved some fprs; In those cases it is advantageous to use
25538 load multiple when available. */
25539 if (info->first_fp_reg_save != 64 || !lr_save_p)
25540 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
25543 /* Using the "exit" out-of-line routine does not improve code size
25544 if using it would require lr to be saved and if only saving one
25545 or two gprs. */
25546 else if (!lr_save_p && info->first_gp_reg_save > 29)
25547 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25549 /* We can only use load multiple or the out-of-line routines to
25550 restore gprs if we've saved all the registers from
25551 first_gp_reg_save. Otherwise, we risk loading garbage.
25552 Of course, if we have saved out-of-line or used stmw then we know
25553 we haven't skipped any gprs. */
25554 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
25555 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
25557 int i;
25559 for (i = info->first_gp_reg_save; i < 32; i++)
25560 if (fixed_reg_p (i) || !save_reg_p (i))
25562 strategy |= REST_INLINE_GPRS;
25563 strategy &= ~REST_MULTIPLE;
25564 break;
25568 if (TARGET_ELF && TARGET_64BIT)
25570 if (!(strategy & SAVE_INLINE_FPRS))
25571 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25572 else if (!(strategy & SAVE_INLINE_GPRS)
25573 && info->first_fp_reg_save == 64)
25574 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
25576 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
25577 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
25579 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
25580 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25582 return strategy;
25585 /* Calculate the stack information for the current function. This is
25586 complicated by having two separate calling sequences, the AIX calling
25587 sequence and the V.4 calling sequence.
25589 AIX (and Darwin/Mac OS X) stack frames look like:
25590 32-bit 64-bit
25591 SP----> +---------------------------------------+
25592 | back chain to caller | 0 0
25593 +---------------------------------------+
25594 | saved CR | 4 8 (8-11)
25595 +---------------------------------------+
25596 | saved LR | 8 16
25597 +---------------------------------------+
25598 | reserved for compilers | 12 24
25599 +---------------------------------------+
25600 | reserved for binders | 16 32
25601 +---------------------------------------+
25602 | saved TOC pointer | 20 40
25603 +---------------------------------------+
25604 | Parameter save area (P) | 24 48
25605 +---------------------------------------+
25606 | Alloca space (A) | 24+P etc.
25607 +---------------------------------------+
25608 | Local variable space (L) | 24+P+A
25609 +---------------------------------------+
25610 | Float/int conversion temporary (X) | 24+P+A+L
25611 +---------------------------------------+
25612 | Save area for AltiVec registers (W) | 24+P+A+L+X
25613 +---------------------------------------+
25614 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
25615 +---------------------------------------+
25616 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
25617 +---------------------------------------+
25618 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
25619 +---------------------------------------+
25620 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
25621 +---------------------------------------+
25622 old SP->| back chain to caller's caller |
25623 +---------------------------------------+
25625 The required alignment for AIX configurations is two words (i.e., 8
25626 or 16 bytes).
25628 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
25630 SP----> +---------------------------------------+
25631 | Back chain to caller | 0
25632 +---------------------------------------+
25633 | Save area for CR | 8
25634 +---------------------------------------+
25635 | Saved LR | 16
25636 +---------------------------------------+
25637 | Saved TOC pointer | 24
25638 +---------------------------------------+
25639 | Parameter save area (P) | 32
25640 +---------------------------------------+
25641 | Alloca space (A) | 32+P
25642 +---------------------------------------+
25643 | Local variable space (L) | 32+P+A
25644 +---------------------------------------+
25645 | Save area for AltiVec registers (W) | 32+P+A+L
25646 +---------------------------------------+
25647 | AltiVec alignment padding (Y) | 32+P+A+L+W
25648 +---------------------------------------+
25649 | Save area for GP registers (G) | 32+P+A+L+W+Y
25650 +---------------------------------------+
25651 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
25652 +---------------------------------------+
25653 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
25654 +---------------------------------------+
25657 V.4 stack frames look like:
25659 SP----> +---------------------------------------+
25660 | back chain to caller | 0
25661 +---------------------------------------+
25662 | caller's saved LR | 4
25663 +---------------------------------------+
25664 | Parameter save area (P) | 8
25665 +---------------------------------------+
25666 | Alloca space (A) | 8+P
25667 +---------------------------------------+
25668 | Varargs save area (V) | 8+P+A
25669 +---------------------------------------+
25670 | Local variable space (L) | 8+P+A+V
25671 +---------------------------------------+
25672 | Float/int conversion temporary (X) | 8+P+A+V+L
25673 +---------------------------------------+
25674 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
25675 +---------------------------------------+
25676 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
25677 +---------------------------------------+
25678 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
25679 +---------------------------------------+
25680 | SPE: area for 64-bit GP registers |
25681 +---------------------------------------+
25682 | SPE alignment padding |
25683 +---------------------------------------+
25684 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
25685 +---------------------------------------+
25686 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
25687 +---------------------------------------+
25688 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
25689 +---------------------------------------+
25690 old SP->| back chain to caller's caller |
25691 +---------------------------------------+
25693 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
25694 given. (But note below and in sysv4.h that we require only 8 and
25695 may round up the size of our stack frame anyways. The historical
25696 reason is early versions of powerpc-linux which didn't properly
25697 align the stack at program startup. A happy side-effect is that
25698 -mno-eabi libraries can be used with -meabi programs.)
25700 The EABI configuration defaults to the V.4 layout. However,
25701 the stack alignment requirements may differ. If -mno-eabi is not
25702 given, the required stack alignment is 8 bytes; if -mno-eabi is
25703 given, the required alignment is 16 bytes. (But see V.4 comment
25704 above.) */
25706 #ifndef ABI_STACK_BOUNDARY
25707 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
25708 #endif
25710 static rs6000_stack_t *
25711 rs6000_stack_info (void)
25713 /* We should never be called for thunks, we are not set up for that. */
25714 gcc_assert (!cfun->is_thunk);
25716 rs6000_stack_t *info = &stack_info;
25717 int reg_size = TARGET_32BIT ? 4 : 8;
25718 int ehrd_size;
25719 int ehcr_size;
25720 int save_align;
25721 int first_gp;
25722 HOST_WIDE_INT non_fixed_size;
25723 bool using_static_chain_p;
25725 if (reload_completed && info->reload_completed)
25726 return info;
25728 memset (info, 0, sizeof (*info));
25729 info->reload_completed = reload_completed;
25731 if (TARGET_SPE)
25733 /* Cache value so we don't rescan instruction chain over and over. */
25734 if (cfun->machine->spe_insn_chain_scanned_p == 0)
25735 cfun->machine->spe_insn_chain_scanned_p
25736 = spe_func_has_64bit_regs_p () + 1;
25737 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
25740 /* Select which calling sequence. */
25741 info->abi = DEFAULT_ABI;
25743 /* Calculate which registers need to be saved & save area size. */
25744 info->first_gp_reg_save = first_reg_to_save ();
25745 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
25746 even if it currently looks like we won't. Reload may need it to
25747 get at a constant; if so, it will have already created a constant
25748 pool entry for it. */
25749 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
25750 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
25751 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
25752 && crtl->uses_const_pool
25753 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
25754 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
25755 else
25756 first_gp = info->first_gp_reg_save;
25758 info->gp_size = reg_size * (32 - first_gp);
25760 /* For the SPE, we have an additional upper 32-bits on each GPR.
25761 Ideally we should save the entire 64-bits only when the upper
25762 half is used in SIMD instructions. Since we only record
25763 registers live (not the size they are used in), this proves
25764 difficult because we'd have to traverse the instruction chain at
25765 the right time, taking reload into account. This is a real pain,
25766 so we opt to save the GPRs in 64-bits always if but one register
25767 gets used in 64-bits. Otherwise, all the registers in the frame
25768 get saved in 32-bits.
25770 So... since when we save all GPRs (except the SP) in 64-bits, the
25771 traditional GP save area will be empty. */
25772 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25773 info->gp_size = 0;
25775 info->first_fp_reg_save = first_fp_reg_to_save ();
25776 info->fp_size = 8 * (64 - info->first_fp_reg_save);
25778 info->first_altivec_reg_save = first_altivec_reg_to_save ();
25779 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
25780 - info->first_altivec_reg_save);
25782 /* Does this function call anything? */
25783 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
25785 /* Determine if we need to save the condition code registers. */
25786 if (save_reg_p (CR2_REGNO)
25787 || save_reg_p (CR3_REGNO)
25788 || save_reg_p (CR4_REGNO))
25790 info->cr_save_p = 1;
25791 if (DEFAULT_ABI == ABI_V4)
25792 info->cr_size = reg_size;
25795 /* If the current function calls __builtin_eh_return, then we need
25796 to allocate stack space for registers that will hold data for
25797 the exception handler. */
25798 if (crtl->calls_eh_return)
25800 unsigned int i;
25801 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
25802 continue;
25804 /* SPE saves EH registers in 64-bits. */
25805 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
25806 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
25808 else
25809 ehrd_size = 0;
25811 /* In the ELFv2 ABI, we also need to allocate space for separate
25812 CR field save areas if the function calls __builtin_eh_return. */
25813 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25815 /* This hard-codes that we have three call-saved CR fields. */
25816 ehcr_size = 3 * reg_size;
25817 /* We do *not* use the regular CR save mechanism. */
25818 info->cr_save_p = 0;
25820 else
25821 ehcr_size = 0;
25823 /* Determine various sizes. */
25824 info->reg_size = reg_size;
25825 info->fixed_size = RS6000_SAVE_AREA;
25826 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
25827 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
25828 TARGET_ALTIVEC ? 16 : 8);
25829 if (FRAME_GROWS_DOWNWARD)
25830 info->vars_size
25831 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
25832 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
25833 - (info->fixed_size + info->vars_size + info->parm_size);
25835 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25836 info->spe_gp_size = 8 * (32 - first_gp);
25838 if (TARGET_ALTIVEC_ABI)
25839 info->vrsave_mask = compute_vrsave_mask ();
25841 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
25842 info->vrsave_size = 4;
25844 compute_save_world_info (info);
25846 /* Calculate the offsets. */
25847 switch (DEFAULT_ABI)
25849 case ABI_NONE:
25850 default:
25851 gcc_unreachable ();
25853 case ABI_AIX:
25854 case ABI_ELFv2:
25855 case ABI_DARWIN:
25856 info->fp_save_offset = -info->fp_size;
25857 info->gp_save_offset = info->fp_save_offset - info->gp_size;
25859 if (TARGET_ALTIVEC_ABI)
25861 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
25863 /* Align stack so vector save area is on a quadword boundary.
25864 The padding goes above the vectors. */
25865 if (info->altivec_size != 0)
25866 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
25868 info->altivec_save_offset = info->vrsave_save_offset
25869 - info->altivec_padding_size
25870 - info->altivec_size;
25871 gcc_assert (info->altivec_size == 0
25872 || info->altivec_save_offset % 16 == 0);
25874 /* Adjust for AltiVec case. */
25875 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
25877 else
25878 info->ehrd_offset = info->gp_save_offset - ehrd_size;
25880 info->ehcr_offset = info->ehrd_offset - ehcr_size;
25881 info->cr_save_offset = reg_size; /* first word when 64-bit. */
25882 info->lr_save_offset = 2*reg_size;
25883 break;
25885 case ABI_V4:
25886 info->fp_save_offset = -info->fp_size;
25887 info->gp_save_offset = info->fp_save_offset - info->gp_size;
25888 info->cr_save_offset = info->gp_save_offset - info->cr_size;
25890 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25892 /* Align stack so SPE GPR save area is aligned on a
25893 double-word boundary. */
25894 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
25895 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
25896 else
25897 info->spe_padding_size = 0;
25899 info->spe_gp_save_offset = info->cr_save_offset
25900 - info->spe_padding_size
25901 - info->spe_gp_size;
25903 /* Adjust for SPE case. */
25904 info->ehrd_offset = info->spe_gp_save_offset;
25906 else if (TARGET_ALTIVEC_ABI)
25908 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
25910 /* Align stack so vector save area is on a quadword boundary. */
25911 if (info->altivec_size != 0)
25912 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
25914 info->altivec_save_offset = info->vrsave_save_offset
25915 - info->altivec_padding_size
25916 - info->altivec_size;
25918 /* Adjust for AltiVec case. */
25919 info->ehrd_offset = info->altivec_save_offset;
25921 else
25922 info->ehrd_offset = info->cr_save_offset;
25924 info->ehrd_offset -= ehrd_size;
25925 info->lr_save_offset = reg_size;
25928 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
25929 info->save_size = RS6000_ALIGN (info->fp_size
25930 + info->gp_size
25931 + info->altivec_size
25932 + info->altivec_padding_size
25933 + info->spe_gp_size
25934 + info->spe_padding_size
25935 + ehrd_size
25936 + ehcr_size
25937 + info->cr_size
25938 + info->vrsave_size,
25939 save_align);
25941 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
25943 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
25944 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
25946 /* Determine if we need to save the link register. */
25947 if (info->calls_p
25948 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25949 && crtl->profile
25950 && !TARGET_PROFILE_KERNEL)
25951 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
25952 #ifdef TARGET_RELOCATABLE
25953 || (DEFAULT_ABI == ABI_V4
25954 && (TARGET_RELOCATABLE || flag_pic > 1)
25955 && get_pool_size () != 0)
25956 #endif
25957 || rs6000_ra_ever_killed ())
25958 info->lr_save_p = 1;
25960 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
25961 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
25962 && call_used_regs[STATIC_CHAIN_REGNUM]);
25963 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
25965 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
25966 || !(info->savres_strategy & SAVE_INLINE_FPRS)
25967 || !(info->savres_strategy & SAVE_INLINE_VRS)
25968 || !(info->savres_strategy & REST_INLINE_GPRS)
25969 || !(info->savres_strategy & REST_INLINE_FPRS)
25970 || !(info->savres_strategy & REST_INLINE_VRS))
25971 info->lr_save_p = 1;
25973 if (info->lr_save_p)
25974 df_set_regs_ever_live (LR_REGNO, true);
25976 /* Determine if we need to allocate any stack frame:
25978 For AIX we need to push the stack if a frame pointer is needed
25979 (because the stack might be dynamically adjusted), if we are
25980 debugging, if we make calls, or if the sum of fp_save, gp_save,
25981 and local variables are more than the space needed to save all
25982 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
25983 + 18*8 = 288 (GPR13 reserved).
25985 For V.4 we don't have the stack cushion that AIX uses, but assume
25986 that the debugger can handle stackless frames. */
25988 if (info->calls_p)
25989 info->push_p = 1;
25991 else if (DEFAULT_ABI == ABI_V4)
25992 info->push_p = non_fixed_size != 0;
25994 else if (frame_pointer_needed)
25995 info->push_p = 1;
25997 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
25998 info->push_p = 1;
26000 else
26001 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
26003 return info;
26006 /* Return true if the current function uses any GPRs in 64-bit SIMD
26007 mode. */
26009 static bool
26010 spe_func_has_64bit_regs_p (void)
26012 rtx_insn *insns, *insn;
26014 /* Functions that save and restore all the call-saved registers will
26015 need to save/restore the registers in 64-bits. */
26016 if (crtl->calls_eh_return
26017 || cfun->calls_setjmp
26018 || crtl->has_nonlocal_goto)
26019 return true;
26021 insns = get_insns ();
26023 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
26025 if (INSN_P (insn))
26027 rtx i;
26029 /* FIXME: This should be implemented with attributes...
26031 (set_attr "spe64" "true")....then,
26032 if (get_spe64(insn)) return true;
26034 It's the only reliable way to do the stuff below. */
26036 i = PATTERN (insn);
26037 if (GET_CODE (i) == SET)
26039 machine_mode mode = GET_MODE (SET_SRC (i));
26041 if (SPE_VECTOR_MODE (mode))
26042 return true;
26043 if (TARGET_E500_DOUBLE
26044 && (mode == DFmode || FLOAT128_2REG_P (mode)))
26045 return true;
26050 return false;
26053 static void
26054 debug_stack_info (rs6000_stack_t *info)
26056 const char *abi_string;
26058 if (! info)
26059 info = rs6000_stack_info ();
26061 fprintf (stderr, "\nStack information for function %s:\n",
26062 ((current_function_decl && DECL_NAME (current_function_decl))
26063 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
26064 : "<unknown>"));
26066 switch (info->abi)
26068 default: abi_string = "Unknown"; break;
26069 case ABI_NONE: abi_string = "NONE"; break;
26070 case ABI_AIX: abi_string = "AIX"; break;
26071 case ABI_ELFv2: abi_string = "ELFv2"; break;
26072 case ABI_DARWIN: abi_string = "Darwin"; break;
26073 case ABI_V4: abi_string = "V.4"; break;
26076 fprintf (stderr, "\tABI = %5s\n", abi_string);
26078 if (TARGET_ALTIVEC_ABI)
26079 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
26081 if (TARGET_SPE_ABI)
26082 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
26084 if (info->first_gp_reg_save != 32)
26085 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
26087 if (info->first_fp_reg_save != 64)
26088 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
26090 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
26091 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
26092 info->first_altivec_reg_save);
26094 if (info->lr_save_p)
26095 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
26097 if (info->cr_save_p)
26098 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
26100 if (info->vrsave_mask)
26101 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
26103 if (info->push_p)
26104 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
26106 if (info->calls_p)
26107 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
26109 if (info->gp_size)
26110 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
26112 if (info->fp_size)
26113 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
26115 if (info->altivec_size)
26116 fprintf (stderr, "\taltivec_save_offset = %5d\n",
26117 info->altivec_save_offset);
26119 if (info->spe_gp_size)
26120 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
26121 info->spe_gp_save_offset);
26123 if (info->vrsave_size)
26124 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
26125 info->vrsave_save_offset);
26127 if (info->lr_save_p)
26128 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
26130 if (info->cr_save_p)
26131 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
26133 if (info->varargs_save_offset)
26134 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
26136 if (info->total_size)
26137 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26138 info->total_size);
26140 if (info->vars_size)
26141 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26142 info->vars_size);
26144 if (info->parm_size)
26145 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
26147 if (info->fixed_size)
26148 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
26150 if (info->gp_size)
26151 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
26153 if (info->spe_gp_size)
26154 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
26156 if (info->fp_size)
26157 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
26159 if (info->altivec_size)
26160 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
26162 if (info->vrsave_size)
26163 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
26165 if (info->altivec_padding_size)
26166 fprintf (stderr, "\taltivec_padding_size= %5d\n",
26167 info->altivec_padding_size);
26169 if (info->spe_padding_size)
26170 fprintf (stderr, "\tspe_padding_size = %5d\n",
26171 info->spe_padding_size);
26173 if (info->cr_size)
26174 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
26176 if (info->save_size)
26177 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
26179 if (info->reg_size != 4)
26180 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
26182 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
26184 fprintf (stderr, "\n");
26188 rs6000_return_addr (int count, rtx frame)
26190 /* Currently we don't optimize very well between prolog and body
26191 code and for PIC code the code can be actually quite bad, so
26192 don't try to be too clever here. */
26193 if (count != 0
26194 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
26196 cfun->machine->ra_needs_full_frame = 1;
26198 return
26199 gen_rtx_MEM
26200 (Pmode,
26201 memory_address
26202 (Pmode,
26203 plus_constant (Pmode,
26204 copy_to_reg
26205 (gen_rtx_MEM (Pmode,
26206 memory_address (Pmode, frame))),
26207 RETURN_ADDRESS_OFFSET)));
26210 cfun->machine->ra_need_lr = 1;
26211 return get_hard_reg_initial_val (Pmode, LR_REGNO);
26214 /* Say whether a function is a candidate for sibcall handling or not. */
26216 static bool
26217 rs6000_function_ok_for_sibcall (tree decl, tree exp)
26219 tree fntype;
26221 if (decl)
26222 fntype = TREE_TYPE (decl);
26223 else
26224 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
26226 /* We can't do it if the called function has more vector parameters
26227 than the current function; there's nowhere to put the VRsave code. */
26228 if (TARGET_ALTIVEC_ABI
26229 && TARGET_ALTIVEC_VRSAVE
26230 && !(decl && decl == current_function_decl))
26232 function_args_iterator args_iter;
26233 tree type;
26234 int nvreg = 0;
26236 /* Functions with vector parameters are required to have a
26237 prototype, so the argument type info must be available
26238 here. */
26239 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
26240 if (TREE_CODE (type) == VECTOR_TYPE
26241 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26242 nvreg++;
26244 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
26245 if (TREE_CODE (type) == VECTOR_TYPE
26246 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26247 nvreg--;
26249 if (nvreg > 0)
26250 return false;
26253 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
26254 functions, because the callee may have a different TOC pointer to
26255 the caller and there's no way to ensure we restore the TOC when
26256 we return. With the secure-plt SYSV ABI we can't make non-local
26257 calls when -fpic/PIC because the plt call stubs use r30. */
26258 if (DEFAULT_ABI == ABI_DARWIN
26259 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26260 && decl
26261 && !DECL_EXTERNAL (decl)
26262 && !DECL_WEAK (decl)
26263 && (*targetm.binds_local_p) (decl))
26264 || (DEFAULT_ABI == ABI_V4
26265 && (!TARGET_SECURE_PLT
26266 || !flag_pic
26267 || (decl
26268 && (*targetm.binds_local_p) (decl)))))
26270 tree attr_list = TYPE_ATTRIBUTES (fntype);
26272 if (!lookup_attribute ("longcall", attr_list)
26273 || lookup_attribute ("shortcall", attr_list))
26274 return true;
26277 return false;
26280 static int
26281 rs6000_ra_ever_killed (void)
26283 rtx_insn *top;
26284 rtx reg;
26285 rtx_insn *insn;
26287 if (cfun->is_thunk)
26288 return 0;
26290 if (cfun->machine->lr_save_state)
26291 return cfun->machine->lr_save_state - 1;
26293 /* regs_ever_live has LR marked as used if any sibcalls are present,
26294 but this should not force saving and restoring in the
26295 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
26296 clobbers LR, so that is inappropriate. */
26298 /* Also, the prologue can generate a store into LR that
26299 doesn't really count, like this:
26301 move LR->R0
26302 bcl to set PIC register
26303 move LR->R31
26304 move R0->LR
26306 When we're called from the epilogue, we need to avoid counting
26307 this as a store. */
26309 push_topmost_sequence ();
26310 top = get_insns ();
26311 pop_topmost_sequence ();
26312 reg = gen_rtx_REG (Pmode, LR_REGNO);
26314 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
26316 if (INSN_P (insn))
26318 if (CALL_P (insn))
26320 if (!SIBLING_CALL_P (insn))
26321 return 1;
26323 else if (find_regno_note (insn, REG_INC, LR_REGNO))
26324 return 1;
26325 else if (set_of (reg, insn) != NULL_RTX
26326 && !prologue_epilogue_contains (insn))
26327 return 1;
26330 return 0;
26333 /* Emit instructions needed to load the TOC register.
26334 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
26335 a constant pool; or for SVR4 -fpic. */
26337 void
26338 rs6000_emit_load_toc_table (int fromprolog)
26340 rtx dest;
26341 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26343 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
26345 char buf[30];
26346 rtx lab, tmp1, tmp2, got;
26348 lab = gen_label_rtx ();
26349 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
26350 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26351 if (flag_pic == 2)
26353 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26354 need_toc_init = 1;
26356 else
26357 got = rs6000_got_sym ();
26358 tmp1 = tmp2 = dest;
26359 if (!fromprolog)
26361 tmp1 = gen_reg_rtx (Pmode);
26362 tmp2 = gen_reg_rtx (Pmode);
26364 emit_insn (gen_load_toc_v4_PIC_1 (lab));
26365 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
26366 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
26367 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
26369 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
26371 emit_insn (gen_load_toc_v4_pic_si ());
26372 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26374 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
26376 char buf[30];
26377 rtx temp0 = (fromprolog
26378 ? gen_rtx_REG (Pmode, 0)
26379 : gen_reg_rtx (Pmode));
26381 if (fromprolog)
26383 rtx symF, symL;
26385 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26386 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26388 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26389 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26391 emit_insn (gen_load_toc_v4_PIC_1 (symF));
26392 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26393 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
26395 else
26397 rtx tocsym, lab;
26399 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26400 need_toc_init = 1;
26401 lab = gen_label_rtx ();
26402 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
26403 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26404 if (TARGET_LINK_STACK)
26405 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
26406 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
26408 emit_insn (gen_addsi3 (dest, temp0, dest));
26410 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
26412 /* This is for AIX code running in non-PIC ELF32. */
26413 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26415 need_toc_init = 1;
26416 emit_insn (gen_elf_high (dest, realsym));
26417 emit_insn (gen_elf_low (dest, dest, realsym));
26419 else
26421 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26423 if (TARGET_32BIT)
26424 emit_insn (gen_load_toc_aix_si (dest));
26425 else
26426 emit_insn (gen_load_toc_aix_di (dest));
26430 /* Emit instructions to restore the link register after determining where
26431 its value has been stored. */
26433 void
26434 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
26436 rs6000_stack_t *info = rs6000_stack_info ();
26437 rtx operands[2];
26439 operands[0] = source;
26440 operands[1] = scratch;
26442 if (info->lr_save_p)
26444 rtx frame_rtx = stack_pointer_rtx;
26445 HOST_WIDE_INT sp_offset = 0;
26446 rtx tmp;
26448 if (frame_pointer_needed
26449 || cfun->calls_alloca
26450 || info->total_size > 32767)
26452 tmp = gen_frame_mem (Pmode, frame_rtx);
26453 emit_move_insn (operands[1], tmp);
26454 frame_rtx = operands[1];
26456 else if (info->push_p)
26457 sp_offset = info->total_size;
26459 tmp = plus_constant (Pmode, frame_rtx,
26460 info->lr_save_offset + sp_offset);
26461 tmp = gen_frame_mem (Pmode, tmp);
26462 emit_move_insn (tmp, operands[0]);
26464 else
26465 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
26467 /* Freeze lr_save_p. We've just emitted rtl that depends on the
26468 state of lr_save_p so any change from here on would be a bug. In
26469 particular, stop rs6000_ra_ever_killed from considering the SET
26470 of lr we may have added just above. */
26471 cfun->machine->lr_save_state = info->lr_save_p + 1;
26474 static GTY(()) alias_set_type set = -1;
26476 alias_set_type
26477 get_TOC_alias_set (void)
26479 if (set == -1)
26480 set = new_alias_set ();
26481 return set;
26484 /* This returns nonzero if the current function uses the TOC. This is
26485 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
26486 is generated by the ABI_V4 load_toc_* patterns. */
26487 #if TARGET_ELF
26488 static int
26489 uses_TOC (void)
26491 rtx_insn *insn;
26493 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26494 if (INSN_P (insn))
26496 rtx pat = PATTERN (insn);
26497 int i;
26499 if (GET_CODE (pat) == PARALLEL)
26500 for (i = 0; i < XVECLEN (pat, 0); i++)
26502 rtx sub = XVECEXP (pat, 0, i);
26503 if (GET_CODE (sub) == USE)
26505 sub = XEXP (sub, 0);
26506 if (GET_CODE (sub) == UNSPEC
26507 && XINT (sub, 1) == UNSPEC_TOC)
26508 return 1;
26512 return 0;
26514 #endif
26517 create_TOC_reference (rtx symbol, rtx largetoc_reg)
26519 rtx tocrel, tocreg, hi;
26521 if (TARGET_DEBUG_ADDR)
26523 if (GET_CODE (symbol) == SYMBOL_REF)
26524 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
26525 XSTR (symbol, 0));
26526 else
26528 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
26529 GET_RTX_NAME (GET_CODE (symbol)));
26530 debug_rtx (symbol);
26534 if (!can_create_pseudo_p ())
26535 df_set_regs_ever_live (TOC_REGISTER, true);
26537 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
26538 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
26539 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
26540 return tocrel;
26542 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
26543 if (largetoc_reg != NULL)
26545 emit_move_insn (largetoc_reg, hi);
26546 hi = largetoc_reg;
26548 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
26551 /* Issue assembly directives that create a reference to the given DWARF
26552 FRAME_TABLE_LABEL from the current function section. */
26553 void
26554 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
26556 fprintf (asm_out_file, "\t.ref %s\n",
26557 (* targetm.strip_name_encoding) (frame_table_label));
26560 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
26561 and the change to the stack pointer. */
26563 static void
26564 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
26566 rtvec p;
26567 int i;
26568 rtx regs[3];
26570 i = 0;
26571 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26572 if (hard_frame_needed)
26573 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
26574 if (!(REGNO (fp) == STACK_POINTER_REGNUM
26575 || (hard_frame_needed
26576 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
26577 regs[i++] = fp;
26579 p = rtvec_alloc (i);
26580 while (--i >= 0)
26582 rtx mem = gen_frame_mem (BLKmode, regs[i]);
26583 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
26586 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
26589 /* Emit the correct code for allocating stack space, as insns.
26590 If COPY_REG, make sure a copy of the old frame is left there.
26591 The generated code may use hard register 0 as a temporary. */
26593 static rtx_insn *
26594 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
26596 rtx_insn *insn;
26597 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26598 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
26599 rtx todec = gen_int_mode (-size, Pmode);
26600 rtx par, set, mem;
26602 if (INTVAL (todec) != -size)
26604 warning (0, "stack frame too large");
26605 emit_insn (gen_trap ());
26606 return 0;
26609 if (crtl->limit_stack)
26611 if (REG_P (stack_limit_rtx)
26612 && REGNO (stack_limit_rtx) > 1
26613 && REGNO (stack_limit_rtx) <= 31)
26615 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
26616 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26617 const0_rtx));
26619 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
26620 && TARGET_32BIT
26621 && DEFAULT_ABI == ABI_V4)
26623 rtx toload = gen_rtx_CONST (VOIDmode,
26624 gen_rtx_PLUS (Pmode,
26625 stack_limit_rtx,
26626 GEN_INT (size)));
26628 emit_insn (gen_elf_high (tmp_reg, toload));
26629 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
26630 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26631 const0_rtx));
26633 else
26634 warning (0, "stack limit expression is not supported");
26637 if (copy_reg)
26639 if (copy_off != 0)
26640 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
26641 else
26642 emit_move_insn (copy_reg, stack_reg);
26645 if (size > 32767)
26647 /* Need a note here so that try_split doesn't get confused. */
26648 if (get_last_insn () == NULL_RTX)
26649 emit_note (NOTE_INSN_DELETED);
26650 insn = emit_move_insn (tmp_reg, todec);
26651 try_split (PATTERN (insn), insn, 0);
26652 todec = tmp_reg;
26655 insn = emit_insn (TARGET_32BIT
26656 ? gen_movsi_update_stack (stack_reg, stack_reg,
26657 todec, stack_reg)
26658 : gen_movdi_di_update_stack (stack_reg, stack_reg,
26659 todec, stack_reg));
26660 /* Since we didn't use gen_frame_mem to generate the MEM, grab
26661 it now and set the alias set/attributes. The above gen_*_update
26662 calls will generate a PARALLEL with the MEM set being the first
26663 operation. */
26664 par = PATTERN (insn);
26665 gcc_assert (GET_CODE (par) == PARALLEL);
26666 set = XVECEXP (par, 0, 0);
26667 gcc_assert (GET_CODE (set) == SET);
26668 mem = SET_DEST (set);
26669 gcc_assert (MEM_P (mem));
26670 MEM_NOTRAP_P (mem) = 1;
26671 set_mem_alias_set (mem, get_frame_alias_set ());
26673 RTX_FRAME_RELATED_P (insn) = 1;
26674 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26675 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
26676 GEN_INT (-size))));
26677 return insn;
26680 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
26682 #if PROBE_INTERVAL > 32768
26683 #error Cannot use indexed addressing mode for stack probing
26684 #endif
26686 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
26687 inclusive. These are offsets from the current stack pointer. */
26689 static void
26690 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
26692 /* See if we have a constant small number of probes to generate. If so,
26693 that's the easy case. */
26694 if (first + size <= 32768)
26696 HOST_WIDE_INT i;
26698 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
26699 it exceeds SIZE. If only one probe is needed, this will not
26700 generate any code. Then probe at FIRST + SIZE. */
26701 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
26702 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26703 -(first + i)));
26705 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26706 -(first + size)));
26709 /* Otherwise, do the same as above, but in a loop. Note that we must be
26710 extra careful with variables wrapping around because we might be at
26711 the very top (or the very bottom) of the address space and we have
26712 to be able to handle this case properly; in particular, we use an
26713 equality test for the loop condition. */
26714 else
26716 HOST_WIDE_INT rounded_size;
26717 rtx r12 = gen_rtx_REG (Pmode, 12);
26718 rtx r0 = gen_rtx_REG (Pmode, 0);
26720 /* Sanity check for the addressing mode we're going to use. */
26721 gcc_assert (first <= 32768);
26723 /* Step 1: round SIZE to the previous multiple of the interval. */
26725 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
26728 /* Step 2: compute initial and final value of the loop counter. */
26730 /* TEST_ADDR = SP + FIRST. */
26731 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
26732 -first)));
26734 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
26735 if (rounded_size > 32768)
26737 emit_move_insn (r0, GEN_INT (-rounded_size));
26738 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
26740 else
26741 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
26742 -rounded_size)));
26745 /* Step 3: the loop
26749 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
26750 probe at TEST_ADDR
26752 while (TEST_ADDR != LAST_ADDR)
26754 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
26755 until it is equal to ROUNDED_SIZE. */
26757 if (TARGET_64BIT)
26758 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
26759 else
26760 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
26763 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
26764 that SIZE is equal to ROUNDED_SIZE. */
26766 if (size != rounded_size)
26767 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
26771 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
26772 absolute addresses. */
26774 const char *
26775 output_probe_stack_range (rtx reg1, rtx reg2)
26777 static int labelno = 0;
26778 char loop_lab[32];
26779 rtx xops[2];
26781 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
26783 /* Loop. */
26784 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
26786 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
26787 xops[0] = reg1;
26788 xops[1] = GEN_INT (-PROBE_INTERVAL);
26789 output_asm_insn ("addi %0,%0,%1", xops);
26791 /* Probe at TEST_ADDR. */
26792 xops[1] = gen_rtx_REG (Pmode, 0);
26793 output_asm_insn ("stw %1,0(%0)", xops);
26795 /* Test if TEST_ADDR == LAST_ADDR. */
26796 xops[1] = reg2;
26797 if (TARGET_64BIT)
26798 output_asm_insn ("cmpd 0,%0,%1", xops);
26799 else
26800 output_asm_insn ("cmpw 0,%0,%1", xops);
26802 /* Branch. */
26803 fputs ("\tbne 0,", asm_out_file);
26804 assemble_name_raw (asm_out_file, loop_lab);
26805 fputc ('\n', asm_out_file);
26807 return "";
26810 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
26811 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
26812 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
26813 deduce these equivalences by itself so it wasn't necessary to hold
26814 its hand so much. Don't be tempted to always supply d2_f_d_e with
26815 the actual cfa register, ie. r31 when we are using a hard frame
26816 pointer. That fails when saving regs off r1, and sched moves the
26817 r31 setup past the reg saves. */
26819 static rtx
26820 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
26821 rtx reg2, rtx repl2)
26823 rtx repl;
26825 if (REGNO (reg) == STACK_POINTER_REGNUM)
26827 gcc_checking_assert (val == 0);
26828 repl = NULL_RTX;
26830 else
26831 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26832 GEN_INT (val));
26834 rtx pat = PATTERN (insn);
26835 if (!repl && !reg2)
26837 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
26838 if (GET_CODE (pat) == PARALLEL)
26839 for (int i = 0; i < XVECLEN (pat, 0); i++)
26840 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26842 rtx set = XVECEXP (pat, 0, i);
26844 /* If this PARALLEL has been emitted for out-of-line
26845 register save functions, or store multiple, then omit
26846 eh_frame info for any user-defined global regs. If
26847 eh_frame info is supplied, frame unwinding will
26848 restore a user reg. */
26849 if (!REG_P (SET_SRC (set))
26850 || !fixed_reg_p (REGNO (SET_SRC (set))))
26851 RTX_FRAME_RELATED_P (set) = 1;
26853 RTX_FRAME_RELATED_P (insn) = 1;
26854 return insn;
26857 /* We expect that 'pat' is either a SET or a PARALLEL containing
26858 SETs (and possibly other stuff). In a PARALLEL, all the SETs
26859 are important so they all have to be marked RTX_FRAME_RELATED_P.
26860 Call simplify_replace_rtx on the SETs rather than the whole insn
26861 so as to leave the other stuff alone (for example USE of r12). */
26863 if (GET_CODE (pat) == SET)
26865 if (repl)
26866 pat = simplify_replace_rtx (pat, reg, repl);
26867 if (reg2)
26868 pat = simplify_replace_rtx (pat, reg2, repl2);
26870 else if (GET_CODE (pat) == PARALLEL)
26872 pat = shallow_copy_rtx (pat);
26873 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
26875 for (int i = 0; i < XVECLEN (pat, 0); i++)
26876 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26878 rtx set = XVECEXP (pat, 0, i);
26880 if (repl)
26881 set = simplify_replace_rtx (set, reg, repl);
26882 if (reg2)
26883 set = simplify_replace_rtx (set, reg2, repl2);
26884 XVECEXP (pat, 0, i) = set;
26886 /* Omit eh_frame info for any user-defined global regs. */
26887 if (!REG_P (SET_SRC (set))
26888 || !fixed_reg_p (REGNO (SET_SRC (set))))
26889 RTX_FRAME_RELATED_P (set) = 1;
26892 else
26893 gcc_unreachable ();
26895 RTX_FRAME_RELATED_P (insn) = 1;
26896 if (repl || reg2)
26897 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
26899 return insn;
26902 /* Returns an insn that has a vrsave set operation with the
26903 appropriate CLOBBERs. */
26905 static rtx
26906 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
26908 int nclobs, i;
26909 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
26910 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26912 clobs[0]
26913 = gen_rtx_SET (vrsave,
26914 gen_rtx_UNSPEC_VOLATILE (SImode,
26915 gen_rtvec (2, reg, vrsave),
26916 UNSPECV_SET_VRSAVE));
26918 nclobs = 1;
26920 /* We need to clobber the registers in the mask so the scheduler
26921 does not move sets to VRSAVE before sets of AltiVec registers.
26923 However, if the function receives nonlocal gotos, reload will set
26924 all call saved registers live. We will end up with:
26926 (set (reg 999) (mem))
26927 (parallel [ (set (reg vrsave) (unspec blah))
26928 (clobber (reg 999))])
26930 The clobber will cause the store into reg 999 to be dead, and
26931 flow will attempt to delete an epilogue insn. In this case, we
26932 need an unspec use/set of the register. */
26934 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26935 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26937 if (!epiloguep || call_used_regs [i])
26938 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
26939 gen_rtx_REG (V4SImode, i));
26940 else
26942 rtx reg = gen_rtx_REG (V4SImode, i);
26944 clobs[nclobs++]
26945 = gen_rtx_SET (reg,
26946 gen_rtx_UNSPEC (V4SImode,
26947 gen_rtvec (1, reg), 27));
26951 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
26953 for (i = 0; i < nclobs; ++i)
26954 XVECEXP (insn, 0, i) = clobs[i];
26956 return insn;
26959 static rtx
26960 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
26962 rtx addr, mem;
26964 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
26965 mem = gen_frame_mem (GET_MODE (reg), addr);
26966 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
26969 static rtx
26970 gen_frame_load (rtx reg, rtx frame_reg, int offset)
26972 return gen_frame_set (reg, frame_reg, offset, false);
26975 static rtx
26976 gen_frame_store (rtx reg, rtx frame_reg, int offset)
26978 return gen_frame_set (reg, frame_reg, offset, true);
26981 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26982 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26984 static rtx
26985 emit_frame_save (rtx frame_reg, machine_mode mode,
26986 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
26988 rtx reg, insn;
26990 /* Some cases that need register indexed addressing. */
26991 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
26992 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
26993 || (TARGET_E500_DOUBLE && mode == DFmode)
26994 || (TARGET_SPE_ABI
26995 && SPE_VECTOR_MODE (mode)
26996 && !SPE_CONST_OFFSET_OK (offset))));
26998 reg = gen_rtx_REG (mode, regno);
26999 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
27000 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
27001 NULL_RTX, NULL_RTX);
27004 /* Emit an offset memory reference suitable for a frame store, while
27005 converting to a valid addressing mode. */
27007 static rtx
27008 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
27010 rtx int_rtx, offset_rtx;
27012 int_rtx = GEN_INT (offset);
27014 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
27015 || (TARGET_E500_DOUBLE && mode == DFmode))
27017 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
27018 emit_move_insn (offset_rtx, int_rtx);
27020 else
27021 offset_rtx = int_rtx;
27023 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
27026 #ifndef TARGET_FIX_AND_CONTINUE
27027 #define TARGET_FIX_AND_CONTINUE 0
27028 #endif
27030 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
27031 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
27032 #define LAST_SAVRES_REGISTER 31
27033 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
27035 enum {
27036 SAVRES_LR = 0x1,
27037 SAVRES_SAVE = 0x2,
27038 SAVRES_REG = 0x0c,
27039 SAVRES_GPR = 0,
27040 SAVRES_FPR = 4,
27041 SAVRES_VR = 8
27044 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
27046 /* Temporary holding space for an out-of-line register save/restore
27047 routine name. */
27048 static char savres_routine_name[30];
27050 /* Return the name for an out-of-line register save/restore routine.
27051 We are saving/restoring GPRs if GPR is true. */
27053 static char *
27054 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
27056 const char *prefix = "";
27057 const char *suffix = "";
27059 /* Different targets are supposed to define
27060 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
27061 routine name could be defined with:
27063 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
27065 This is a nice idea in practice, but in reality, things are
27066 complicated in several ways:
27068 - ELF targets have save/restore routines for GPRs.
27070 - SPE targets use different prefixes for 32/64-bit registers, and
27071 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
27073 - PPC64 ELF targets have routines for save/restore of GPRs that
27074 differ in what they do with the link register, so having a set
27075 prefix doesn't work. (We only use one of the save routines at
27076 the moment, though.)
27078 - PPC32 elf targets have "exit" versions of the restore routines
27079 that restore the link register and can save some extra space.
27080 These require an extra suffix. (There are also "tail" versions
27081 of the restore routines and "GOT" versions of the save routines,
27082 but we don't generate those at present. Same problems apply,
27083 though.)
27085 We deal with all this by synthesizing our own prefix/suffix and
27086 using that for the simple sprintf call shown above. */
27087 if (TARGET_SPE)
27089 /* No floating point saves on the SPE. */
27090 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
27092 if ((sel & SAVRES_SAVE))
27093 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
27094 else
27095 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
27097 if ((sel & SAVRES_LR))
27098 suffix = "_x";
27100 else if (DEFAULT_ABI == ABI_V4)
27102 if (TARGET_64BIT)
27103 goto aix_names;
27105 if ((sel & SAVRES_REG) == SAVRES_GPR)
27106 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
27107 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27108 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
27109 else if ((sel & SAVRES_REG) == SAVRES_VR)
27110 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27111 else
27112 abort ();
27114 if ((sel & SAVRES_LR))
27115 suffix = "_x";
27117 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27119 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
27120 /* No out-of-line save/restore routines for GPRs on AIX. */
27121 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
27122 #endif
27124 aix_names:
27125 if ((sel & SAVRES_REG) == SAVRES_GPR)
27126 prefix = ((sel & SAVRES_SAVE)
27127 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
27128 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
27129 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27131 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27132 if ((sel & SAVRES_LR))
27133 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
27134 else
27135 #endif
27137 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
27138 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
27141 else if ((sel & SAVRES_REG) == SAVRES_VR)
27142 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27143 else
27144 abort ();
27147 if (DEFAULT_ABI == ABI_DARWIN)
27149 /* The Darwin approach is (slightly) different, in order to be
27150 compatible with code generated by the system toolchain. There is a
27151 single symbol for the start of save sequence, and the code here
27152 embeds an offset into that code on the basis of the first register
27153 to be saved. */
27154 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
27155 if ((sel & SAVRES_REG) == SAVRES_GPR)
27156 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
27157 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
27158 (regno - 13) * 4, prefix, regno);
27159 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27160 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
27161 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
27162 else if ((sel & SAVRES_REG) == SAVRES_VR)
27163 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
27164 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
27165 else
27166 abort ();
27168 else
27169 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
27171 return savres_routine_name;
27174 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
27175 We are saving/restoring GPRs if GPR is true. */
27177 static rtx
27178 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
27180 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
27181 ? info->first_gp_reg_save
27182 : (sel & SAVRES_REG) == SAVRES_FPR
27183 ? info->first_fp_reg_save - 32
27184 : (sel & SAVRES_REG) == SAVRES_VR
27185 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
27186 : -1);
27187 rtx sym;
27188 int select = sel;
27190 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
27191 versions of the gpr routines. */
27192 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
27193 && info->spe_64bit_regs_used)
27194 select ^= SAVRES_FPR ^ SAVRES_GPR;
27196 /* Don't generate bogus routine names. */
27197 gcc_assert (FIRST_SAVRES_REGISTER <= regno
27198 && regno <= LAST_SAVRES_REGISTER
27199 && select >= 0 && select <= 12);
27201 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
27203 if (sym == NULL)
27205 char *name;
27207 name = rs6000_savres_routine_name (info, regno, sel);
27209 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
27210 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
27211 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
27214 return sym;
27217 /* Emit a sequence of insns, including a stack tie if needed, for
27218 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
27219 reset the stack pointer, but move the base of the frame into
27220 reg UPDT_REGNO for use by out-of-line register restore routines. */
27222 static rtx
27223 rs6000_emit_stack_reset (rs6000_stack_t *info,
27224 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
27225 unsigned updt_regno)
27227 rtx updt_reg_rtx;
27229 /* This blockage is needed so that sched doesn't decide to move
27230 the sp change before the register restores. */
27231 if (DEFAULT_ABI == ABI_V4
27232 || (TARGET_SPE_ABI
27233 && info->spe_64bit_regs_used != 0
27234 && info->first_gp_reg_save != 32))
27235 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
27237 /* If we are restoring registers out-of-line, we will be using the
27238 "exit" variants of the restore routines, which will reset the
27239 stack for us. But we do need to point updt_reg into the
27240 right place for those routines. */
27241 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
27243 if (frame_off != 0)
27244 return emit_insn (gen_add3_insn (updt_reg_rtx,
27245 frame_reg_rtx, GEN_INT (frame_off)));
27246 else if (REGNO (frame_reg_rtx) != updt_regno)
27247 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
27249 return NULL_RTX;
27252 /* Return the register number used as a pointer by out-of-line
27253 save/restore functions. */
27255 static inline unsigned
27256 ptr_regno_for_savres (int sel)
27258 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27259 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
27260 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
27263 /* Construct a parallel rtx describing the effect of a call to an
27264 out-of-line register save/restore routine, and emit the insn
27265 or jump_insn as appropriate. */
27267 static rtx
27268 rs6000_emit_savres_rtx (rs6000_stack_t *info,
27269 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
27270 machine_mode reg_mode, int sel)
27272 int i;
27273 int offset, start_reg, end_reg, n_regs, use_reg;
27274 int reg_size = GET_MODE_SIZE (reg_mode);
27275 rtx sym;
27276 rtvec p;
27277 rtx par, insn;
27279 offset = 0;
27280 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27281 ? info->first_gp_reg_save
27282 : (sel & SAVRES_REG) == SAVRES_FPR
27283 ? info->first_fp_reg_save
27284 : (sel & SAVRES_REG) == SAVRES_VR
27285 ? info->first_altivec_reg_save
27286 : -1);
27287 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27288 ? 32
27289 : (sel & SAVRES_REG) == SAVRES_FPR
27290 ? 64
27291 : (sel & SAVRES_REG) == SAVRES_VR
27292 ? LAST_ALTIVEC_REGNO + 1
27293 : -1);
27294 n_regs = end_reg - start_reg;
27295 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
27296 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
27297 + n_regs);
27299 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27300 RTVEC_ELT (p, offset++) = ret_rtx;
27302 RTVEC_ELT (p, offset++)
27303 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
27305 sym = rs6000_savres_routine_sym (info, sel);
27306 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
27308 use_reg = ptr_regno_for_savres (sel);
27309 if ((sel & SAVRES_REG) == SAVRES_VR)
27311 /* Vector regs are saved/restored using [reg+reg] addressing. */
27312 RTVEC_ELT (p, offset++)
27313 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27314 RTVEC_ELT (p, offset++)
27315 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
27317 else
27318 RTVEC_ELT (p, offset++)
27319 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27321 for (i = 0; i < end_reg - start_reg; i++)
27322 RTVEC_ELT (p, i + offset)
27323 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
27324 frame_reg_rtx, save_area_offset + reg_size * i,
27325 (sel & SAVRES_SAVE) != 0);
27327 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27328 RTVEC_ELT (p, i + offset)
27329 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
27331 par = gen_rtx_PARALLEL (VOIDmode, p);
27333 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27335 insn = emit_jump_insn (par);
27336 JUMP_LABEL (insn) = ret_rtx;
27338 else
27339 insn = emit_insn (par);
27340 return insn;
27343 /* Emit code to store CR fields that need to be saved into REG. */
27345 static void
27346 rs6000_emit_move_from_cr (rtx reg)
27348 /* Only the ELFv2 ABI allows storing only selected fields. */
27349 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
27351 int i, cr_reg[8], count = 0;
27353 /* Collect CR fields that must be saved. */
27354 for (i = 0; i < 8; i++)
27355 if (save_reg_p (CR0_REGNO + i))
27356 cr_reg[count++] = i;
27358 /* If it's just a single one, use mfcrf. */
27359 if (count == 1)
27361 rtvec p = rtvec_alloc (1);
27362 rtvec r = rtvec_alloc (2);
27363 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
27364 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
27365 RTVEC_ELT (p, 0)
27366 = gen_rtx_SET (reg,
27367 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
27369 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27370 return;
27373 /* ??? It might be better to handle count == 2 / 3 cases here
27374 as well, using logical operations to combine the values. */
27377 emit_insn (gen_movesi_from_cr (reg));
27380 /* Return whether the split-stack arg pointer (r12) is used. */
27382 static bool
27383 split_stack_arg_pointer_used_p (void)
27385 /* If the pseudo holding the arg pointer is no longer a pseudo,
27386 then the arg pointer is used. */
27387 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
27388 && (!REG_P (cfun->machine->split_stack_arg_pointer)
27389 || (REGNO (cfun->machine->split_stack_arg_pointer)
27390 < FIRST_PSEUDO_REGISTER)))
27391 return true;
27393 /* Unfortunately we also need to do some code scanning, since
27394 r12 may have been substituted for the pseudo. */
27395 rtx_insn *insn;
27396 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
27397 FOR_BB_INSNS (bb, insn)
27398 if (NONDEBUG_INSN_P (insn))
27400 /* A call destroys r12. */
27401 if (CALL_P (insn))
27402 return false;
27404 df_ref use;
27405 FOR_EACH_INSN_USE (use, insn)
27407 rtx x = DF_REF_REG (use);
27408 if (REG_P (x) && REGNO (x) == 12)
27409 return true;
27411 df_ref def;
27412 FOR_EACH_INSN_DEF (def, insn)
27414 rtx x = DF_REF_REG (def);
27415 if (REG_P (x) && REGNO (x) == 12)
27416 return false;
27419 return bitmap_bit_p (DF_LR_OUT (bb), 12);
27422 /* Return whether we need to emit an ELFv2 global entry point prologue. */
27424 static bool
27425 rs6000_global_entry_point_needed_p (void)
27427 /* Only needed for the ELFv2 ABI. */
27428 if (DEFAULT_ABI != ABI_ELFv2)
27429 return false;
27431 /* With -msingle-pic-base, we assume the whole program shares the same
27432 TOC, so no global entry point prologues are needed anywhere. */
27433 if (TARGET_SINGLE_PIC_BASE)
27434 return false;
27436 /* Ensure we have a global entry point for thunks. ??? We could
27437 avoid that if the target routine doesn't need a global entry point,
27438 but we do not know whether this is the case at this point. */
27439 if (cfun->is_thunk)
27440 return true;
27442 /* For regular functions, rs6000_emit_prologue sets this flag if the
27443 routine ever uses the TOC pointer. */
27444 return cfun->machine->r2_setup_needed;
27447 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
27448 static sbitmap
27449 rs6000_get_separate_components (void)
27451 rs6000_stack_t *info = rs6000_stack_info ();
27453 if (WORLD_SAVE_P (info))
27454 return NULL;
27456 sbitmap components = sbitmap_alloc (32);
27457 bitmap_clear (components);
27459 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
27460 && !(info->savres_strategy & REST_MULTIPLE));
27462 /* The GPRs we need saved to the frame. */
27463 if ((info->savres_strategy & SAVE_INLINE_GPRS)
27464 && (info->savres_strategy & REST_INLINE_GPRS))
27466 int reg_size = TARGET_32BIT ? 4 : 8;
27467 int offset = info->gp_save_offset;
27468 if (info->push_p)
27469 offset += info->total_size;
27471 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27473 if (IN_RANGE (offset, -0x8000, 0x7fff)
27474 && rs6000_reg_live_or_pic_offset_p (regno))
27475 bitmap_set_bit (components, regno);
27477 offset += reg_size;
27481 /* Don't mess with the hard frame pointer. */
27482 if (frame_pointer_needed)
27483 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
27485 /* Don't mess with the fixed TOC register. */
27486 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
27487 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27488 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27489 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
27491 /* Optimize LR save and restore if we can. This is component 0. Any
27492 out-of-line register save/restore routines need LR. */
27493 if (info->lr_save_p
27494 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27495 && (info->savres_strategy & SAVE_INLINE_GPRS)
27496 && (info->savres_strategy & REST_INLINE_GPRS)
27497 && (info->savres_strategy & SAVE_INLINE_FPRS)
27498 && (info->savres_strategy & REST_INLINE_FPRS)
27499 && (info->savres_strategy & SAVE_INLINE_VRS)
27500 && (info->savres_strategy & REST_INLINE_VRS))
27502 int offset = info->lr_save_offset;
27503 if (info->push_p)
27504 offset += info->total_size;
27505 if (IN_RANGE (offset, -0x8000, 0x7fff))
27506 bitmap_set_bit (components, 0);
27509 return components;
27512 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
27513 static sbitmap
27514 rs6000_components_for_bb (basic_block bb)
27516 rs6000_stack_t *info = rs6000_stack_info ();
27518 bitmap in = DF_LIVE_IN (bb);
27519 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
27520 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
27522 sbitmap components = sbitmap_alloc (32);
27523 bitmap_clear (components);
27525 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
27526 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27527 if (bitmap_bit_p (in, regno)
27528 || bitmap_bit_p (gen, regno)
27529 || bitmap_bit_p (kill, regno))
27530 bitmap_set_bit (components, regno);
27532 /* LR needs to be saved around a bb if it is killed in that bb. */
27533 if (bitmap_bit_p (gen, LR_REGNO)
27534 || bitmap_bit_p (kill, LR_REGNO))
27535 bitmap_set_bit (components, 0);
27537 return components;
27540 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
27541 static void
27542 rs6000_disqualify_components (sbitmap components, edge e,
27543 sbitmap edge_components, bool /*is_prologue*/)
27545 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
27546 live where we want to place that code. */
27547 if (bitmap_bit_p (edge_components, 0)
27548 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
27550 if (dump_file)
27551 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
27552 "on entry to bb %d\n", e->dest->index);
27553 bitmap_clear_bit (components, 0);
27557 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
27558 static void
27559 rs6000_emit_prologue_components (sbitmap components)
27561 rs6000_stack_t *info = rs6000_stack_info ();
27562 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27563 ? HARD_FRAME_POINTER_REGNUM
27564 : STACK_POINTER_REGNUM);
27565 int reg_size = TARGET_32BIT ? 4 : 8;
27567 /* Prologue for LR. */
27568 if (bitmap_bit_p (components, 0))
27570 rtx reg = gen_rtx_REG (Pmode, 0);
27571 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27572 RTX_FRAME_RELATED_P (insn) = 1;
27573 add_reg_note (insn, REG_CFA_REGISTER, NULL);
27575 int offset = info->lr_save_offset;
27576 if (info->push_p)
27577 offset += info->total_size;
27579 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27580 RTX_FRAME_RELATED_P (insn) = 1;
27581 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27582 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
27583 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
27586 /* Prologue for the GPRs. */
27587 int offset = info->gp_save_offset;
27588 if (info->push_p)
27589 offset += info->total_size;
27591 for (int i = info->first_gp_reg_save; i < 32; i++)
27593 if (bitmap_bit_p (components, i))
27595 rtx reg = gen_rtx_REG (Pmode, i);
27596 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27597 RTX_FRAME_RELATED_P (insn) = 1;
27598 rtx set = copy_rtx (single_set (insn));
27599 add_reg_note (insn, REG_CFA_OFFSET, set);
27602 offset += reg_size;
27606 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
27607 static void
27608 rs6000_emit_epilogue_components (sbitmap components)
27610 rs6000_stack_t *info = rs6000_stack_info ();
27611 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27612 ? HARD_FRAME_POINTER_REGNUM
27613 : STACK_POINTER_REGNUM);
27614 int reg_size = TARGET_32BIT ? 4 : 8;
27616 /* Epilogue for the GPRs. */
27617 int offset = info->gp_save_offset;
27618 if (info->push_p)
27619 offset += info->total_size;
27621 for (int i = info->first_gp_reg_save; i < 32; i++)
27623 if (bitmap_bit_p (components, i))
27625 rtx reg = gen_rtx_REG (Pmode, i);
27626 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27627 RTX_FRAME_RELATED_P (insn) = 1;
27628 add_reg_note (insn, REG_CFA_RESTORE, reg);
27631 offset += reg_size;
27634 /* Epilogue for LR. */
27635 if (bitmap_bit_p (components, 0))
27637 int offset = info->lr_save_offset;
27638 if (info->push_p)
27639 offset += info->total_size;
27641 rtx reg = gen_rtx_REG (Pmode, 0);
27642 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27644 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27645 insn = emit_move_insn (lr, reg);
27646 RTX_FRAME_RELATED_P (insn) = 1;
27647 add_reg_note (insn, REG_CFA_RESTORE, lr);
27651 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
27652 static void
27653 rs6000_set_handled_components (sbitmap components)
27655 rs6000_stack_t *info = rs6000_stack_info ();
27657 for (int i = info->first_gp_reg_save; i < 32; i++)
27658 if (bitmap_bit_p (components, i))
27659 cfun->machine->gpr_is_wrapped_separately[i] = true;
27661 if (bitmap_bit_p (components, 0))
27662 cfun->machine->lr_is_wrapped_separately = true;
27665 /* Emit function prologue as insns. */
27667 void
27668 rs6000_emit_prologue (void)
27670 rs6000_stack_t *info = rs6000_stack_info ();
27671 machine_mode reg_mode = Pmode;
27672 int reg_size = TARGET_32BIT ? 4 : 8;
27673 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27674 rtx frame_reg_rtx = sp_reg_rtx;
27675 unsigned int cr_save_regno;
27676 rtx cr_save_rtx = NULL_RTX;
27677 rtx insn;
27678 int strategy;
27679 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27680 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27681 && call_used_regs[STATIC_CHAIN_REGNUM]);
27682 int using_split_stack = (flag_split_stack
27683 && (lookup_attribute ("no_split_stack",
27684 DECL_ATTRIBUTES (cfun->decl))
27685 == NULL));
27687 /* Offset to top of frame for frame_reg and sp respectively. */
27688 HOST_WIDE_INT frame_off = 0;
27689 HOST_WIDE_INT sp_off = 0;
27690 /* sp_adjust is the stack adjusting instruction, tracked so that the
27691 insn setting up the split-stack arg pointer can be emitted just
27692 prior to it, when r12 is not used here for other purposes. */
27693 rtx_insn *sp_adjust = 0;
27695 #if CHECKING_P
27696 /* Track and check usage of r0, r11, r12. */
27697 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
27698 #define START_USE(R) do \
27700 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27701 reg_inuse |= 1 << (R); \
27702 } while (0)
27703 #define END_USE(R) do \
27705 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
27706 reg_inuse &= ~(1 << (R)); \
27707 } while (0)
27708 #define NOT_INUSE(R) do \
27710 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27711 } while (0)
27712 #else
27713 #define START_USE(R) do {} while (0)
27714 #define END_USE(R) do {} while (0)
27715 #define NOT_INUSE(R) do {} while (0)
27716 #endif
27718 if (DEFAULT_ABI == ABI_ELFv2
27719 && !TARGET_SINGLE_PIC_BASE)
27721 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
27723 /* With -mminimal-toc we may generate an extra use of r2 below. */
27724 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
27725 cfun->machine->r2_setup_needed = true;
27729 if (flag_stack_usage_info)
27730 current_function_static_stack_size = info->total_size;
27732 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
27734 HOST_WIDE_INT size = info->total_size;
27736 if (crtl->is_leaf && !cfun->calls_alloca)
27738 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27739 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
27740 size - STACK_CHECK_PROTECT);
27742 else if (size > 0)
27743 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
27746 if (TARGET_FIX_AND_CONTINUE)
27748 /* gdb on darwin arranges to forward a function from the old
27749 address by modifying the first 5 instructions of the function
27750 to branch to the overriding function. This is necessary to
27751 permit function pointers that point to the old function to
27752 actually forward to the new function. */
27753 emit_insn (gen_nop ());
27754 emit_insn (gen_nop ());
27755 emit_insn (gen_nop ());
27756 emit_insn (gen_nop ());
27757 emit_insn (gen_nop ());
27760 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27762 reg_mode = V2SImode;
27763 reg_size = 8;
27766 /* Handle world saves specially here. */
27767 if (WORLD_SAVE_P (info))
27769 int i, j, sz;
27770 rtx treg;
27771 rtvec p;
27772 rtx reg0;
27774 /* save_world expects lr in r0. */
27775 reg0 = gen_rtx_REG (Pmode, 0);
27776 if (info->lr_save_p)
27778 insn = emit_move_insn (reg0,
27779 gen_rtx_REG (Pmode, LR_REGNO));
27780 RTX_FRAME_RELATED_P (insn) = 1;
27783 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
27784 assumptions about the offsets of various bits of the stack
27785 frame. */
27786 gcc_assert (info->gp_save_offset == -220
27787 && info->fp_save_offset == -144
27788 && info->lr_save_offset == 8
27789 && info->cr_save_offset == 4
27790 && info->push_p
27791 && info->lr_save_p
27792 && (!crtl->calls_eh_return
27793 || info->ehrd_offset == -432)
27794 && info->vrsave_save_offset == -224
27795 && info->altivec_save_offset == -416);
27797 treg = gen_rtx_REG (SImode, 11);
27798 emit_move_insn (treg, GEN_INT (-info->total_size));
27800 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
27801 in R11. It also clobbers R12, so beware! */
27803 /* Preserve CR2 for save_world prologues */
27804 sz = 5;
27805 sz += 32 - info->first_gp_reg_save;
27806 sz += 64 - info->first_fp_reg_save;
27807 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
27808 p = rtvec_alloc (sz);
27809 j = 0;
27810 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
27811 gen_rtx_REG (SImode,
27812 LR_REGNO));
27813 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27814 gen_rtx_SYMBOL_REF (Pmode,
27815 "*save_world"));
27816 /* We do floats first so that the instruction pattern matches
27817 properly. */
27818 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27819 RTVEC_ELT (p, j++)
27820 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27821 ? DFmode : SFmode,
27822 info->first_fp_reg_save + i),
27823 frame_reg_rtx,
27824 info->fp_save_offset + frame_off + 8 * i);
27825 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27826 RTVEC_ELT (p, j++)
27827 = gen_frame_store (gen_rtx_REG (V4SImode,
27828 info->first_altivec_reg_save + i),
27829 frame_reg_rtx,
27830 info->altivec_save_offset + frame_off + 16 * i);
27831 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27832 RTVEC_ELT (p, j++)
27833 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27834 frame_reg_rtx,
27835 info->gp_save_offset + frame_off + reg_size * i);
27837 /* CR register traditionally saved as CR2. */
27838 RTVEC_ELT (p, j++)
27839 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
27840 frame_reg_rtx, info->cr_save_offset + frame_off);
27841 /* Explain about use of R0. */
27842 if (info->lr_save_p)
27843 RTVEC_ELT (p, j++)
27844 = gen_frame_store (reg0,
27845 frame_reg_rtx, info->lr_save_offset + frame_off);
27846 /* Explain what happens to the stack pointer. */
27848 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
27849 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
27852 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27853 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27854 treg, GEN_INT (-info->total_size));
27855 sp_off = frame_off = info->total_size;
27858 strategy = info->savres_strategy;
27860 /* For V.4, update stack before we do any saving and set back pointer. */
27861 if (! WORLD_SAVE_P (info)
27862 && info->push_p
27863 && (DEFAULT_ABI == ABI_V4
27864 || crtl->calls_eh_return))
27866 bool need_r11 = (TARGET_SPE
27867 ? (!(strategy & SAVE_INLINE_GPRS)
27868 && info->spe_64bit_regs_used == 0)
27869 : (!(strategy & SAVE_INLINE_FPRS)
27870 || !(strategy & SAVE_INLINE_GPRS)
27871 || !(strategy & SAVE_INLINE_VRS)));
27872 int ptr_regno = -1;
27873 rtx ptr_reg = NULL_RTX;
27874 int ptr_off = 0;
27876 if (info->total_size < 32767)
27877 frame_off = info->total_size;
27878 else if (need_r11)
27879 ptr_regno = 11;
27880 else if (info->cr_save_p
27881 || info->lr_save_p
27882 || info->first_fp_reg_save < 64
27883 || info->first_gp_reg_save < 32
27884 || info->altivec_size != 0
27885 || info->vrsave_size != 0
27886 || crtl->calls_eh_return)
27887 ptr_regno = 12;
27888 else
27890 /* The prologue won't be saving any regs so there is no need
27891 to set up a frame register to access any frame save area.
27892 We also won't be using frame_off anywhere below, but set
27893 the correct value anyway to protect against future
27894 changes to this function. */
27895 frame_off = info->total_size;
27897 if (ptr_regno != -1)
27899 /* Set up the frame offset to that needed by the first
27900 out-of-line save function. */
27901 START_USE (ptr_regno);
27902 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27903 frame_reg_rtx = ptr_reg;
27904 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
27905 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
27906 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
27907 ptr_off = info->gp_save_offset + info->gp_size;
27908 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
27909 ptr_off = info->altivec_save_offset + info->altivec_size;
27910 frame_off = -ptr_off;
27912 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27913 ptr_reg, ptr_off);
27914 if (REGNO (frame_reg_rtx) == 12)
27915 sp_adjust = 0;
27916 sp_off = info->total_size;
27917 if (frame_reg_rtx != sp_reg_rtx)
27918 rs6000_emit_stack_tie (frame_reg_rtx, false);
27921 /* If we use the link register, get it into r0. */
27922 if (!WORLD_SAVE_P (info) && info->lr_save_p
27923 && !cfun->machine->lr_is_wrapped_separately)
27925 rtx addr, reg, mem;
27927 reg = gen_rtx_REG (Pmode, 0);
27928 START_USE (0);
27929 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27930 RTX_FRAME_RELATED_P (insn) = 1;
27932 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
27933 | SAVE_NOINLINE_FPRS_SAVES_LR)))
27935 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27936 GEN_INT (info->lr_save_offset + frame_off));
27937 mem = gen_rtx_MEM (Pmode, addr);
27938 /* This should not be of rs6000_sr_alias_set, because of
27939 __builtin_return_address. */
27941 insn = emit_move_insn (mem, reg);
27942 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27943 NULL_RTX, NULL_RTX);
27944 END_USE (0);
27948 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
27949 r12 will be needed by out-of-line gpr restore. */
27950 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27951 && !(strategy & (SAVE_INLINE_GPRS
27952 | SAVE_NOINLINE_GPRS_SAVES_LR))
27953 ? 11 : 12);
27954 if (!WORLD_SAVE_P (info)
27955 && info->cr_save_p
27956 && REGNO (frame_reg_rtx) != cr_save_regno
27957 && !(using_static_chain_p && cr_save_regno == 11)
27958 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
27960 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
27961 START_USE (cr_save_regno);
27962 rs6000_emit_move_from_cr (cr_save_rtx);
27965 /* Do any required saving of fpr's. If only one or two to save, do
27966 it ourselves. Otherwise, call function. */
27967 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
27969 int i;
27970 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27971 if (save_reg_p (info->first_fp_reg_save + i))
27972 emit_frame_save (frame_reg_rtx,
27973 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27974 ? DFmode : SFmode),
27975 info->first_fp_reg_save + i,
27976 info->fp_save_offset + frame_off + 8 * i,
27977 sp_off - frame_off);
27979 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
27981 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27982 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27983 unsigned ptr_regno = ptr_regno_for_savres (sel);
27984 rtx ptr_reg = frame_reg_rtx;
27986 if (REGNO (frame_reg_rtx) == ptr_regno)
27987 gcc_checking_assert (frame_off == 0);
27988 else
27990 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27991 NOT_INUSE (ptr_regno);
27992 emit_insn (gen_add3_insn (ptr_reg,
27993 frame_reg_rtx, GEN_INT (frame_off)));
27995 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27996 info->fp_save_offset,
27997 info->lr_save_offset,
27998 DFmode, sel);
27999 rs6000_frame_related (insn, ptr_reg, sp_off,
28000 NULL_RTX, NULL_RTX);
28001 if (lr)
28002 END_USE (0);
28005 /* Save GPRs. This is done as a PARALLEL if we are using
28006 the store-multiple instructions. */
28007 if (!WORLD_SAVE_P (info)
28008 && TARGET_SPE_ABI
28009 && info->spe_64bit_regs_used != 0
28010 && info->first_gp_reg_save != 32)
28012 int i;
28013 rtx spe_save_area_ptr;
28014 HOST_WIDE_INT save_off;
28015 int ool_adjust = 0;
28017 /* Determine whether we can address all of the registers that need
28018 to be saved with an offset from frame_reg_rtx that fits in
28019 the small const field for SPE memory instructions. */
28020 int spe_regs_addressable
28021 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
28022 + reg_size * (32 - info->first_gp_reg_save - 1))
28023 && (strategy & SAVE_INLINE_GPRS));
28025 if (spe_regs_addressable)
28027 spe_save_area_ptr = frame_reg_rtx;
28028 save_off = frame_off;
28030 else
28032 /* Make r11 point to the start of the SPE save area. We need
28033 to be careful here if r11 is holding the static chain. If
28034 it is, then temporarily save it in r0. */
28035 HOST_WIDE_INT offset;
28037 if (!(strategy & SAVE_INLINE_GPRS))
28038 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
28039 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
28040 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
28041 save_off = frame_off - offset;
28043 if (using_static_chain_p)
28045 rtx r0 = gen_rtx_REG (Pmode, 0);
28047 START_USE (0);
28048 gcc_assert (info->first_gp_reg_save > 11);
28050 emit_move_insn (r0, spe_save_area_ptr);
28052 else if (REGNO (frame_reg_rtx) != 11)
28053 START_USE (11);
28055 emit_insn (gen_addsi3 (spe_save_area_ptr,
28056 frame_reg_rtx, GEN_INT (offset)));
28057 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
28058 frame_off = -info->spe_gp_save_offset + ool_adjust;
28061 if ((strategy & SAVE_INLINE_GPRS))
28063 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28064 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28065 emit_frame_save (spe_save_area_ptr, reg_mode,
28066 info->first_gp_reg_save + i,
28067 (info->spe_gp_save_offset + save_off
28068 + reg_size * i),
28069 sp_off - save_off);
28071 else
28073 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
28074 info->spe_gp_save_offset + save_off,
28075 0, reg_mode,
28076 SAVRES_SAVE | SAVRES_GPR);
28078 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
28079 NULL_RTX, NULL_RTX);
28082 /* Move the static chain pointer back. */
28083 if (!spe_regs_addressable)
28085 if (using_static_chain_p)
28087 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
28088 END_USE (0);
28090 else if (REGNO (frame_reg_rtx) != 11)
28091 END_USE (11);
28094 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
28096 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
28097 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
28098 unsigned ptr_regno = ptr_regno_for_savres (sel);
28099 rtx ptr_reg = frame_reg_rtx;
28100 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
28101 int end_save = info->gp_save_offset + info->gp_size;
28102 int ptr_off;
28104 if (ptr_regno == 12)
28105 sp_adjust = 0;
28106 if (!ptr_set_up)
28107 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28109 /* Need to adjust r11 (r12) if we saved any FPRs. */
28110 if (end_save + frame_off != 0)
28112 rtx offset = GEN_INT (end_save + frame_off);
28114 if (ptr_set_up)
28115 frame_off = -end_save;
28116 else
28117 NOT_INUSE (ptr_regno);
28118 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28120 else if (!ptr_set_up)
28122 NOT_INUSE (ptr_regno);
28123 emit_move_insn (ptr_reg, frame_reg_rtx);
28125 ptr_off = -end_save;
28126 insn = rs6000_emit_savres_rtx (info, ptr_reg,
28127 info->gp_save_offset + ptr_off,
28128 info->lr_save_offset + ptr_off,
28129 reg_mode, sel);
28130 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
28131 NULL_RTX, NULL_RTX);
28132 if (lr)
28133 END_USE (0);
28135 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
28137 rtvec p;
28138 int i;
28139 p = rtvec_alloc (32 - info->first_gp_reg_save);
28140 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28141 RTVEC_ELT (p, i)
28142 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28143 frame_reg_rtx,
28144 info->gp_save_offset + frame_off + reg_size * i);
28145 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28146 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28147 NULL_RTX, NULL_RTX);
28149 else if (!WORLD_SAVE_P (info))
28151 int offset = info->gp_save_offset + frame_off;
28152 for (int i = info->first_gp_reg_save; i < 32; i++)
28154 if (rs6000_reg_live_or_pic_offset_p (i)
28155 && !cfun->machine->gpr_is_wrapped_separately[i])
28156 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
28157 sp_off - frame_off);
28159 offset += reg_size;
28163 if (crtl->calls_eh_return)
28165 unsigned int i;
28166 rtvec p;
28168 for (i = 0; ; ++i)
28170 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28171 if (regno == INVALID_REGNUM)
28172 break;
28175 p = rtvec_alloc (i);
28177 for (i = 0; ; ++i)
28179 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28180 if (regno == INVALID_REGNUM)
28181 break;
28183 insn
28184 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
28185 sp_reg_rtx,
28186 info->ehrd_offset + sp_off + reg_size * (int) i);
28187 RTVEC_ELT (p, i) = insn;
28188 RTX_FRAME_RELATED_P (insn) = 1;
28191 insn = emit_insn (gen_blockage ());
28192 RTX_FRAME_RELATED_P (insn) = 1;
28193 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
28196 /* In AIX ABI we need to make sure r2 is really saved. */
28197 if (TARGET_AIX && crtl->calls_eh_return)
28199 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
28200 rtx save_insn, join_insn, note;
28201 long toc_restore_insn;
28203 tmp_reg = gen_rtx_REG (Pmode, 11);
28204 tmp_reg_si = gen_rtx_REG (SImode, 11);
28205 if (using_static_chain_p)
28207 START_USE (0);
28208 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
28210 else
28211 START_USE (11);
28212 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
28213 /* Peek at instruction to which this function returns. If it's
28214 restoring r2, then we know we've already saved r2. We can't
28215 unconditionally save r2 because the value we have will already
28216 be updated if we arrived at this function via a plt call or
28217 toc adjusting stub. */
28218 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
28219 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
28220 + RS6000_TOC_SAVE_SLOT);
28221 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
28222 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
28223 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
28224 validate_condition_mode (EQ, CCUNSmode);
28225 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
28226 emit_insn (gen_rtx_SET (compare_result,
28227 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
28228 toc_save_done = gen_label_rtx ();
28229 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28230 gen_rtx_EQ (VOIDmode, compare_result,
28231 const0_rtx),
28232 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
28233 pc_rtx);
28234 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28235 JUMP_LABEL (jump) = toc_save_done;
28236 LABEL_NUSES (toc_save_done) += 1;
28238 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
28239 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
28240 sp_off - frame_off);
28242 emit_label (toc_save_done);
28244 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
28245 have a CFG that has different saves along different paths.
28246 Move the note to a dummy blockage insn, which describes that
28247 R2 is unconditionally saved after the label. */
28248 /* ??? An alternate representation might be a special insn pattern
28249 containing both the branch and the store. That might let the
28250 code that minimizes the number of DW_CFA_advance opcodes better
28251 freedom in placing the annotations. */
28252 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
28253 if (note)
28254 remove_note (save_insn, note);
28255 else
28256 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
28257 copy_rtx (PATTERN (save_insn)), NULL_RTX);
28258 RTX_FRAME_RELATED_P (save_insn) = 0;
28260 join_insn = emit_insn (gen_blockage ());
28261 REG_NOTES (join_insn) = note;
28262 RTX_FRAME_RELATED_P (join_insn) = 1;
28264 if (using_static_chain_p)
28266 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
28267 END_USE (0);
28269 else
28270 END_USE (11);
28273 /* Save CR if we use any that must be preserved. */
28274 if (!WORLD_SAVE_P (info) && info->cr_save_p)
28276 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28277 GEN_INT (info->cr_save_offset + frame_off));
28278 rtx mem = gen_frame_mem (SImode, addr);
28280 /* If we didn't copy cr before, do so now using r0. */
28281 if (cr_save_rtx == NULL_RTX)
28283 START_USE (0);
28284 cr_save_rtx = gen_rtx_REG (SImode, 0);
28285 rs6000_emit_move_from_cr (cr_save_rtx);
28288 /* Saving CR requires a two-instruction sequence: one instruction
28289 to move the CR to a general-purpose register, and a second
28290 instruction that stores the GPR to memory.
28292 We do not emit any DWARF CFI records for the first of these,
28293 because we cannot properly represent the fact that CR is saved in
28294 a register. One reason is that we cannot express that multiple
28295 CR fields are saved; another reason is that on 64-bit, the size
28296 of the CR register in DWARF (4 bytes) differs from the size of
28297 a general-purpose register.
28299 This means if any intervening instruction were to clobber one of
28300 the call-saved CR fields, we'd have incorrect CFI. To prevent
28301 this from happening, we mark the store to memory as a use of
28302 those CR fields, which prevents any such instruction from being
28303 scheduled in between the two instructions. */
28304 rtx crsave_v[9];
28305 int n_crsave = 0;
28306 int i;
28308 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
28309 for (i = 0; i < 8; i++)
28310 if (save_reg_p (CR0_REGNO + i))
28311 crsave_v[n_crsave++]
28312 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28314 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
28315 gen_rtvec_v (n_crsave, crsave_v)));
28316 END_USE (REGNO (cr_save_rtx));
28318 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
28319 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
28320 so we need to construct a frame expression manually. */
28321 RTX_FRAME_RELATED_P (insn) = 1;
28323 /* Update address to be stack-pointer relative, like
28324 rs6000_frame_related would do. */
28325 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28326 GEN_INT (info->cr_save_offset + sp_off));
28327 mem = gen_frame_mem (SImode, addr);
28329 if (DEFAULT_ABI == ABI_ELFv2)
28331 /* In the ELFv2 ABI we generate separate CFI records for each
28332 CR field that was actually saved. They all point to the
28333 same 32-bit stack slot. */
28334 rtx crframe[8];
28335 int n_crframe = 0;
28337 for (i = 0; i < 8; i++)
28338 if (save_reg_p (CR0_REGNO + i))
28340 crframe[n_crframe]
28341 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
28343 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
28344 n_crframe++;
28347 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28348 gen_rtx_PARALLEL (VOIDmode,
28349 gen_rtvec_v (n_crframe, crframe)));
28351 else
28353 /* In other ABIs, by convention, we use a single CR regnum to
28354 represent the fact that all call-saved CR fields are saved.
28355 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
28356 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
28357 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
28361 /* In the ELFv2 ABI we need to save all call-saved CR fields into
28362 *separate* slots if the routine calls __builtin_eh_return, so
28363 that they can be independently restored by the unwinder. */
28364 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28366 int i, cr_off = info->ehcr_offset;
28367 rtx crsave;
28369 /* ??? We might get better performance by using multiple mfocrf
28370 instructions. */
28371 crsave = gen_rtx_REG (SImode, 0);
28372 emit_insn (gen_movesi_from_cr (crsave));
28374 for (i = 0; i < 8; i++)
28375 if (!call_used_regs[CR0_REGNO + i])
28377 rtvec p = rtvec_alloc (2);
28378 RTVEC_ELT (p, 0)
28379 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
28380 RTVEC_ELT (p, 1)
28381 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28383 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28385 RTX_FRAME_RELATED_P (insn) = 1;
28386 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28387 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
28388 sp_reg_rtx, cr_off + sp_off));
28390 cr_off += reg_size;
28394 /* Update stack and set back pointer unless this is V.4,
28395 for which it was done previously. */
28396 if (!WORLD_SAVE_P (info) && info->push_p
28397 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
28399 rtx ptr_reg = NULL;
28400 int ptr_off = 0;
28402 /* If saving altivec regs we need to be able to address all save
28403 locations using a 16-bit offset. */
28404 if ((strategy & SAVE_INLINE_VRS) == 0
28405 || (info->altivec_size != 0
28406 && (info->altivec_save_offset + info->altivec_size - 16
28407 + info->total_size - frame_off) > 32767)
28408 || (info->vrsave_size != 0
28409 && (info->vrsave_save_offset
28410 + info->total_size - frame_off) > 32767))
28412 int sel = SAVRES_SAVE | SAVRES_VR;
28413 unsigned ptr_regno = ptr_regno_for_savres (sel);
28415 if (using_static_chain_p
28416 && ptr_regno == STATIC_CHAIN_REGNUM)
28417 ptr_regno = 12;
28418 if (REGNO (frame_reg_rtx) != ptr_regno)
28419 START_USE (ptr_regno);
28420 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28421 frame_reg_rtx = ptr_reg;
28422 ptr_off = info->altivec_save_offset + info->altivec_size;
28423 frame_off = -ptr_off;
28425 else if (REGNO (frame_reg_rtx) == 1)
28426 frame_off = info->total_size;
28427 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28428 ptr_reg, ptr_off);
28429 if (REGNO (frame_reg_rtx) == 12)
28430 sp_adjust = 0;
28431 sp_off = info->total_size;
28432 if (frame_reg_rtx != sp_reg_rtx)
28433 rs6000_emit_stack_tie (frame_reg_rtx, false);
28436 /* Set frame pointer, if needed. */
28437 if (frame_pointer_needed)
28439 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
28440 sp_reg_rtx);
28441 RTX_FRAME_RELATED_P (insn) = 1;
28444 /* Save AltiVec registers if needed. Save here because the red zone does
28445 not always include AltiVec registers. */
28446 if (!WORLD_SAVE_P (info)
28447 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
28449 int end_save = info->altivec_save_offset + info->altivec_size;
28450 int ptr_off;
28451 /* Oddly, the vector save/restore functions point r0 at the end
28452 of the save area, then use r11 or r12 to load offsets for
28453 [reg+reg] addressing. */
28454 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28455 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
28456 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28458 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28459 NOT_INUSE (0);
28460 if (scratch_regno == 12)
28461 sp_adjust = 0;
28462 if (end_save + frame_off != 0)
28464 rtx offset = GEN_INT (end_save + frame_off);
28466 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28468 else
28469 emit_move_insn (ptr_reg, frame_reg_rtx);
28471 ptr_off = -end_save;
28472 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28473 info->altivec_save_offset + ptr_off,
28474 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
28475 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
28476 NULL_RTX, NULL_RTX);
28477 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28479 /* The oddity mentioned above clobbered our frame reg. */
28480 emit_move_insn (frame_reg_rtx, ptr_reg);
28481 frame_off = ptr_off;
28484 else if (!WORLD_SAVE_P (info)
28485 && info->altivec_size != 0)
28487 int i;
28489 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28490 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28492 rtx areg, savereg, mem;
28493 HOST_WIDE_INT offset;
28495 offset = (info->altivec_save_offset + frame_off
28496 + 16 * (i - info->first_altivec_reg_save));
28498 savereg = gen_rtx_REG (V4SImode, i);
28500 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28502 mem = gen_frame_mem (V4SImode,
28503 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28504 GEN_INT (offset)));
28505 insn = emit_insn (gen_rtx_SET (mem, savereg));
28506 areg = NULL_RTX;
28508 else
28510 NOT_INUSE (0);
28511 areg = gen_rtx_REG (Pmode, 0);
28512 emit_move_insn (areg, GEN_INT (offset));
28514 /* AltiVec addressing mode is [reg+reg]. */
28515 mem = gen_frame_mem (V4SImode,
28516 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
28518 /* Rather than emitting a generic move, force use of the stvx
28519 instruction, which we always want on ISA 2.07 (power8) systems.
28520 In particular we don't want xxpermdi/stxvd2x for little
28521 endian. */
28522 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
28525 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28526 areg, GEN_INT (offset));
28530 /* VRSAVE is a bit vector representing which AltiVec registers
28531 are used. The OS uses this to determine which vector
28532 registers to save on a context switch. We need to save
28533 VRSAVE on the stack frame, add whatever AltiVec registers we
28534 used in this function, and do the corresponding magic in the
28535 epilogue. */
28537 if (!WORLD_SAVE_P (info)
28538 && info->vrsave_size != 0)
28540 rtx reg, vrsave;
28541 int offset;
28542 int save_regno;
28544 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
28545 be using r12 as frame_reg_rtx and r11 as the static chain
28546 pointer for nested functions. */
28547 save_regno = 12;
28548 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28549 && !using_static_chain_p)
28550 save_regno = 11;
28551 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
28553 save_regno = 11;
28554 if (using_static_chain_p)
28555 save_regno = 0;
28558 NOT_INUSE (save_regno);
28559 reg = gen_rtx_REG (SImode, save_regno);
28560 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28561 if (TARGET_MACHO)
28562 emit_insn (gen_get_vrsave_internal (reg));
28563 else
28564 emit_insn (gen_rtx_SET (reg, vrsave));
28566 /* Save VRSAVE. */
28567 offset = info->vrsave_save_offset + frame_off;
28568 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
28570 /* Include the registers in the mask. */
28571 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
28573 insn = emit_insn (generate_set_vrsave (reg, info, 0));
28576 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
28577 if (!TARGET_SINGLE_PIC_BASE
28578 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
28579 || (DEFAULT_ABI == ABI_V4
28580 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
28581 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
28583 /* If emit_load_toc_table will use the link register, we need to save
28584 it. We use R12 for this purpose because emit_load_toc_table
28585 can use register 0. This allows us to use a plain 'blr' to return
28586 from the procedure more often. */
28587 int save_LR_around_toc_setup = (TARGET_ELF
28588 && DEFAULT_ABI == ABI_V4
28589 && flag_pic
28590 && ! info->lr_save_p
28591 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
28592 if (save_LR_around_toc_setup)
28594 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28595 rtx tmp = gen_rtx_REG (Pmode, 12);
28597 sp_adjust = 0;
28598 insn = emit_move_insn (tmp, lr);
28599 RTX_FRAME_RELATED_P (insn) = 1;
28601 rs6000_emit_load_toc_table (TRUE);
28603 insn = emit_move_insn (lr, tmp);
28604 add_reg_note (insn, REG_CFA_RESTORE, lr);
28605 RTX_FRAME_RELATED_P (insn) = 1;
28607 else
28608 rs6000_emit_load_toc_table (TRUE);
28611 #if TARGET_MACHO
28612 if (!TARGET_SINGLE_PIC_BASE
28613 && DEFAULT_ABI == ABI_DARWIN
28614 && flag_pic && crtl->uses_pic_offset_table)
28616 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28617 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
28619 /* Save and restore LR locally around this call (in R0). */
28620 if (!info->lr_save_p)
28621 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
28623 emit_insn (gen_load_macho_picbase (src));
28625 emit_move_insn (gen_rtx_REG (Pmode,
28626 RS6000_PIC_OFFSET_TABLE_REGNUM),
28627 lr);
28629 if (!info->lr_save_p)
28630 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
28632 #endif
28634 /* If we need to, save the TOC register after doing the stack setup.
28635 Do not emit eh frame info for this save. The unwinder wants info,
28636 conceptually attached to instructions in this function, about
28637 register values in the caller of this function. This R2 may have
28638 already been changed from the value in the caller.
28639 We don't attempt to write accurate DWARF EH frame info for R2
28640 because code emitted by gcc for a (non-pointer) function call
28641 doesn't save and restore R2. Instead, R2 is managed out-of-line
28642 by a linker generated plt call stub when the function resides in
28643 a shared library. This behavior is costly to describe in DWARF,
28644 both in terms of the size of DWARF info and the time taken in the
28645 unwinder to interpret it. R2 changes, apart from the
28646 calls_eh_return case earlier in this function, are handled by
28647 linux-unwind.h frob_update_context. */
28648 if (rs6000_save_toc_in_prologue_p ())
28650 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
28651 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
28654 if (using_split_stack && split_stack_arg_pointer_used_p ())
28656 /* Set up the arg pointer (r12) for -fsplit-stack code. If
28657 __morestack was called, it left the arg pointer to the old
28658 stack in r29. Otherwise, the arg pointer is the top of the
28659 current frame. */
28660 cfun->machine->split_stack_argp_used = true;
28661 if (sp_adjust)
28663 rtx r12 = gen_rtx_REG (Pmode, 12);
28664 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
28665 emit_insn_before (set_r12, sp_adjust);
28667 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
28669 rtx r12 = gen_rtx_REG (Pmode, 12);
28670 if (frame_off == 0)
28671 emit_move_insn (r12, frame_reg_rtx);
28672 else
28673 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
28675 if (info->push_p)
28677 rtx r12 = gen_rtx_REG (Pmode, 12);
28678 rtx r29 = gen_rtx_REG (Pmode, 29);
28679 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28680 rtx not_more = gen_label_rtx ();
28681 rtx jump;
28683 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28684 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
28685 gen_rtx_LABEL_REF (VOIDmode, not_more),
28686 pc_rtx);
28687 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28688 JUMP_LABEL (jump) = not_more;
28689 LABEL_NUSES (not_more) += 1;
28690 emit_move_insn (r12, r29);
28691 emit_label (not_more);
28696 /* Output .extern statements for the save/restore routines we use. */
28698 static void
28699 rs6000_output_savres_externs (FILE *file)
28701 rs6000_stack_t *info = rs6000_stack_info ();
28703 if (TARGET_DEBUG_STACK)
28704 debug_stack_info (info);
28706 /* Write .extern for any function we will call to save and restore
28707 fp values. */
28708 if (info->first_fp_reg_save < 64
28709 && !TARGET_MACHO
28710 && !TARGET_ELF)
28712 char *name;
28713 int regno = info->first_fp_reg_save - 32;
28715 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
28717 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28718 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28719 name = rs6000_savres_routine_name (info, regno, sel);
28720 fprintf (file, "\t.extern %s\n", name);
28722 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
28724 bool lr = (info->savres_strategy
28725 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28726 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28727 name = rs6000_savres_routine_name (info, regno, sel);
28728 fprintf (file, "\t.extern %s\n", name);
28733 /* Write function prologue. */
28735 static void
28736 rs6000_output_function_prologue (FILE *file,
28737 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28739 if (!cfun->is_thunk)
28740 rs6000_output_savres_externs (file);
28742 /* ELFv2 ABI r2 setup code and local entry point. This must follow
28743 immediately after the global entry point label. */
28744 if (rs6000_global_entry_point_needed_p ())
28746 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28748 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
28750 if (TARGET_CMODEL != CMODEL_LARGE)
28752 /* In the small and medium code models, we assume the TOC is less
28753 2 GB away from the text section, so it can be computed via the
28754 following two-instruction sequence. */
28755 char buf[256];
28757 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28758 fprintf (file, "0:\taddis 2,12,.TOC.-");
28759 assemble_name (file, buf);
28760 fprintf (file, "@ha\n");
28761 fprintf (file, "\taddi 2,2,.TOC.-");
28762 assemble_name (file, buf);
28763 fprintf (file, "@l\n");
28765 else
28767 /* In the large code model, we allow arbitrary offsets between the
28768 TOC and the text section, so we have to load the offset from
28769 memory. The data field is emitted directly before the global
28770 entry point in rs6000_elf_declare_function_name. */
28771 char buf[256];
28773 #ifdef HAVE_AS_ENTRY_MARKERS
28774 /* If supported by the linker, emit a marker relocation. If the
28775 total code size of the final executable or shared library
28776 happens to fit into 2 GB after all, the linker will replace
28777 this code sequence with the sequence for the small or medium
28778 code model. */
28779 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
28780 #endif
28781 fprintf (file, "\tld 2,");
28782 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28783 assemble_name (file, buf);
28784 fprintf (file, "-");
28785 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28786 assemble_name (file, buf);
28787 fprintf (file, "(12)\n");
28788 fprintf (file, "\tadd 2,2,12\n");
28791 fputs ("\t.localentry\t", file);
28792 assemble_name (file, name);
28793 fputs (",.-", file);
28794 assemble_name (file, name);
28795 fputs ("\n", file);
28798 /* Output -mprofile-kernel code. This needs to be done here instead of
28799 in output_function_profile since it must go after the ELFv2 ABI
28800 local entry point. */
28801 if (TARGET_PROFILE_KERNEL && crtl->profile)
28803 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28804 gcc_assert (!TARGET_32BIT);
28806 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
28808 /* In the ELFv2 ABI we have no compiler stack word. It must be
28809 the resposibility of _mcount to preserve the static chain
28810 register if required. */
28811 if (DEFAULT_ABI != ABI_ELFv2
28812 && cfun->static_chain_decl != NULL)
28814 asm_fprintf (file, "\tstd %s,24(%s)\n",
28815 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28816 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28817 asm_fprintf (file, "\tld %s,24(%s)\n",
28818 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28820 else
28821 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28824 rs6000_pic_labelno++;
28827 /* -mprofile-kernel code calls mcount before the function prolog,
28828 so a profiled leaf function should stay a leaf function. */
28829 static bool
28830 rs6000_keep_leaf_when_profiled ()
28832 return TARGET_PROFILE_KERNEL;
28835 /* Non-zero if vmx regs are restored before the frame pop, zero if
28836 we restore after the pop when possible. */
28837 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
28839 /* Restoring cr is a two step process: loading a reg from the frame
28840 save, then moving the reg to cr. For ABI_V4 we must let the
28841 unwinder know that the stack location is no longer valid at or
28842 before the stack deallocation, but we can't emit a cfa_restore for
28843 cr at the stack deallocation like we do for other registers.
28844 The trouble is that it is possible for the move to cr to be
28845 scheduled after the stack deallocation. So say exactly where cr
28846 is located on each of the two insns. */
28848 static rtx
28849 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
28851 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
28852 rtx reg = gen_rtx_REG (SImode, regno);
28853 rtx_insn *insn = emit_move_insn (reg, mem);
28855 if (!exit_func && DEFAULT_ABI == ABI_V4)
28857 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28858 rtx set = gen_rtx_SET (reg, cr);
28860 add_reg_note (insn, REG_CFA_REGISTER, set);
28861 RTX_FRAME_RELATED_P (insn) = 1;
28863 return reg;
28866 /* Reload CR from REG. */
28868 static void
28869 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
28871 int count = 0;
28872 int i;
28874 if (using_mfcr_multiple)
28876 for (i = 0; i < 8; i++)
28877 if (save_reg_p (CR0_REGNO + i))
28878 count++;
28879 gcc_assert (count);
28882 if (using_mfcr_multiple && count > 1)
28884 rtx_insn *insn;
28885 rtvec p;
28886 int ndx;
28888 p = rtvec_alloc (count);
28890 ndx = 0;
28891 for (i = 0; i < 8; i++)
28892 if (save_reg_p (CR0_REGNO + i))
28894 rtvec r = rtvec_alloc (2);
28895 RTVEC_ELT (r, 0) = reg;
28896 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
28897 RTVEC_ELT (p, ndx) =
28898 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
28899 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
28900 ndx++;
28902 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28903 gcc_assert (ndx == count);
28905 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28906 CR field separately. */
28907 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28909 for (i = 0; i < 8; i++)
28910 if (save_reg_p (CR0_REGNO + i))
28911 add_reg_note (insn, REG_CFA_RESTORE,
28912 gen_rtx_REG (SImode, CR0_REGNO + i));
28914 RTX_FRAME_RELATED_P (insn) = 1;
28917 else
28918 for (i = 0; i < 8; i++)
28919 if (save_reg_p (CR0_REGNO + i))
28921 rtx insn = emit_insn (gen_movsi_to_cr_one
28922 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28924 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28925 CR field separately, attached to the insn that in fact
28926 restores this particular CR field. */
28927 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28929 add_reg_note (insn, REG_CFA_RESTORE,
28930 gen_rtx_REG (SImode, CR0_REGNO + i));
28932 RTX_FRAME_RELATED_P (insn) = 1;
28936 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
28937 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
28938 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28940 rtx_insn *insn = get_last_insn ();
28941 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28943 add_reg_note (insn, REG_CFA_RESTORE, cr);
28944 RTX_FRAME_RELATED_P (insn) = 1;
28948 /* Like cr, the move to lr instruction can be scheduled after the
28949 stack deallocation, but unlike cr, its stack frame save is still
28950 valid. So we only need to emit the cfa_restore on the correct
28951 instruction. */
28953 static void
28954 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
28956 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
28957 rtx reg = gen_rtx_REG (Pmode, regno);
28959 emit_move_insn (reg, mem);
28962 static void
28963 restore_saved_lr (int regno, bool exit_func)
28965 rtx reg = gen_rtx_REG (Pmode, regno);
28966 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28967 rtx_insn *insn = emit_move_insn (lr, reg);
28969 if (!exit_func && flag_shrink_wrap)
28971 add_reg_note (insn, REG_CFA_RESTORE, lr);
28972 RTX_FRAME_RELATED_P (insn) = 1;
28976 static rtx
28977 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
28979 if (DEFAULT_ABI == ABI_ELFv2)
28981 int i;
28982 for (i = 0; i < 8; i++)
28983 if (save_reg_p (CR0_REGNO + i))
28985 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
28986 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
28987 cfa_restores);
28990 else if (info->cr_save_p)
28991 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28992 gen_rtx_REG (SImode, CR2_REGNO),
28993 cfa_restores);
28995 if (info->lr_save_p)
28996 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28997 gen_rtx_REG (Pmode, LR_REGNO),
28998 cfa_restores);
28999 return cfa_restores;
29002 /* Return true if OFFSET from stack pointer can be clobbered by signals.
29003 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
29004 below stack pointer not cloberred by signals. */
29006 static inline bool
29007 offset_below_red_zone_p (HOST_WIDE_INT offset)
29009 return offset < (DEFAULT_ABI == ABI_V4
29011 : TARGET_32BIT ? -220 : -288);
29014 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
29016 static void
29017 emit_cfa_restores (rtx cfa_restores)
29019 rtx_insn *insn = get_last_insn ();
29020 rtx *loc = &REG_NOTES (insn);
29022 while (*loc)
29023 loc = &XEXP (*loc, 1);
29024 *loc = cfa_restores;
29025 RTX_FRAME_RELATED_P (insn) = 1;
29028 /* Emit function epilogue as insns. */
29030 void
29031 rs6000_emit_epilogue (int sibcall)
29033 rs6000_stack_t *info;
29034 int restoring_GPRs_inline;
29035 int restoring_FPRs_inline;
29036 int using_load_multiple;
29037 int using_mtcr_multiple;
29038 int use_backchain_to_restore_sp;
29039 int restore_lr;
29040 int strategy;
29041 HOST_WIDE_INT frame_off = 0;
29042 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
29043 rtx frame_reg_rtx = sp_reg_rtx;
29044 rtx cfa_restores = NULL_RTX;
29045 rtx insn;
29046 rtx cr_save_reg = NULL_RTX;
29047 machine_mode reg_mode = Pmode;
29048 int reg_size = TARGET_32BIT ? 4 : 8;
29049 int i;
29050 bool exit_func;
29051 unsigned ptr_regno;
29053 info = rs6000_stack_info ();
29055 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29057 reg_mode = V2SImode;
29058 reg_size = 8;
29061 strategy = info->savres_strategy;
29062 using_load_multiple = strategy & REST_MULTIPLE;
29063 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
29064 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
29065 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
29066 || rs6000_cpu == PROCESSOR_PPC603
29067 || rs6000_cpu == PROCESSOR_PPC750
29068 || optimize_size);
29069 /* Restore via the backchain when we have a large frame, since this
29070 is more efficient than an addis, addi pair. The second condition
29071 here will not trigger at the moment; We don't actually need a
29072 frame pointer for alloca, but the generic parts of the compiler
29073 give us one anyway. */
29074 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
29075 ? info->lr_save_offset
29076 : 0) > 32767
29077 || (cfun->calls_alloca
29078 && !frame_pointer_needed));
29079 restore_lr = (info->lr_save_p
29080 && (restoring_FPRs_inline
29081 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
29082 && (restoring_GPRs_inline
29083 || info->first_fp_reg_save < 64)
29084 && !cfun->machine->lr_is_wrapped_separately);
29087 if (WORLD_SAVE_P (info))
29089 int i, j;
29090 char rname[30];
29091 const char *alloc_rname;
29092 rtvec p;
29094 /* eh_rest_world_r10 will return to the location saved in the LR
29095 stack slot (which is not likely to be our caller.)
29096 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
29097 rest_world is similar, except any R10 parameter is ignored.
29098 The exception-handling stuff that was here in 2.95 is no
29099 longer necessary. */
29101 p = rtvec_alloc (9
29102 + 32 - info->first_gp_reg_save
29103 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
29104 + 63 + 1 - info->first_fp_reg_save);
29106 strcpy (rname, ((crtl->calls_eh_return) ?
29107 "*eh_rest_world_r10" : "*rest_world"));
29108 alloc_rname = ggc_strdup (rname);
29110 j = 0;
29111 RTVEC_ELT (p, j++) = ret_rtx;
29112 RTVEC_ELT (p, j++)
29113 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
29114 /* The instruction pattern requires a clobber here;
29115 it is shared with the restVEC helper. */
29116 RTVEC_ELT (p, j++)
29117 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
29120 /* CR register traditionally saved as CR2. */
29121 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
29122 RTVEC_ELT (p, j++)
29123 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
29124 if (flag_shrink_wrap)
29126 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29127 gen_rtx_REG (Pmode, LR_REGNO),
29128 cfa_restores);
29129 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29133 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29135 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29136 RTVEC_ELT (p, j++)
29137 = gen_frame_load (reg,
29138 frame_reg_rtx, info->gp_save_offset + reg_size * i);
29139 if (flag_shrink_wrap)
29140 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29142 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29144 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
29145 RTVEC_ELT (p, j++)
29146 = gen_frame_load (reg,
29147 frame_reg_rtx, info->altivec_save_offset + 16 * i);
29148 if (flag_shrink_wrap)
29149 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29151 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
29153 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29154 ? DFmode : SFmode),
29155 info->first_fp_reg_save + i);
29156 RTVEC_ELT (p, j++)
29157 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
29158 if (flag_shrink_wrap)
29159 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29161 RTVEC_ELT (p, j++)
29162 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
29163 RTVEC_ELT (p, j++)
29164 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
29165 RTVEC_ELT (p, j++)
29166 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
29167 RTVEC_ELT (p, j++)
29168 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
29169 RTVEC_ELT (p, j++)
29170 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
29171 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29173 if (flag_shrink_wrap)
29175 REG_NOTES (insn) = cfa_restores;
29176 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29177 RTX_FRAME_RELATED_P (insn) = 1;
29179 return;
29182 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
29183 if (info->push_p)
29184 frame_off = info->total_size;
29186 /* Restore AltiVec registers if we must do so before adjusting the
29187 stack. */
29188 if (info->altivec_size != 0
29189 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29190 || (DEFAULT_ABI != ABI_V4
29191 && offset_below_red_zone_p (info->altivec_save_offset))))
29193 int i;
29194 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29196 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
29197 if (use_backchain_to_restore_sp)
29199 int frame_regno = 11;
29201 if ((strategy & REST_INLINE_VRS) == 0)
29203 /* Of r11 and r12, select the one not clobbered by an
29204 out-of-line restore function for the frame register. */
29205 frame_regno = 11 + 12 - scratch_regno;
29207 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
29208 emit_move_insn (frame_reg_rtx,
29209 gen_rtx_MEM (Pmode, sp_reg_rtx));
29210 frame_off = 0;
29212 else if (frame_pointer_needed)
29213 frame_reg_rtx = hard_frame_pointer_rtx;
29215 if ((strategy & REST_INLINE_VRS) == 0)
29217 int end_save = info->altivec_save_offset + info->altivec_size;
29218 int ptr_off;
29219 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29220 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29222 if (end_save + frame_off != 0)
29224 rtx offset = GEN_INT (end_save + frame_off);
29226 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29228 else
29229 emit_move_insn (ptr_reg, frame_reg_rtx);
29231 ptr_off = -end_save;
29232 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29233 info->altivec_save_offset + ptr_off,
29234 0, V4SImode, SAVRES_VR);
29236 else
29238 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29239 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29241 rtx addr, areg, mem, insn;
29242 rtx reg = gen_rtx_REG (V4SImode, i);
29243 HOST_WIDE_INT offset
29244 = (info->altivec_save_offset + frame_off
29245 + 16 * (i - info->first_altivec_reg_save));
29247 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29249 mem = gen_frame_mem (V4SImode,
29250 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29251 GEN_INT (offset)));
29252 insn = gen_rtx_SET (reg, mem);
29254 else
29256 areg = gen_rtx_REG (Pmode, 0);
29257 emit_move_insn (areg, GEN_INT (offset));
29259 /* AltiVec addressing mode is [reg+reg]. */
29260 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29261 mem = gen_frame_mem (V4SImode, addr);
29263 /* Rather than emitting a generic move, force use of the
29264 lvx instruction, which we always want. In particular we
29265 don't want lxvd2x/xxpermdi for little endian. */
29266 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29269 (void) emit_insn (insn);
29273 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29274 if (((strategy & REST_INLINE_VRS) == 0
29275 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29276 && (flag_shrink_wrap
29277 || (offset_below_red_zone_p
29278 (info->altivec_save_offset
29279 + 16 * (i - info->first_altivec_reg_save)))))
29281 rtx reg = gen_rtx_REG (V4SImode, i);
29282 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29286 /* Restore VRSAVE if we must do so before adjusting the stack. */
29287 if (info->vrsave_size != 0
29288 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29289 || (DEFAULT_ABI != ABI_V4
29290 && offset_below_red_zone_p (info->vrsave_save_offset))))
29292 rtx reg;
29294 if (frame_reg_rtx == sp_reg_rtx)
29296 if (use_backchain_to_restore_sp)
29298 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29299 emit_move_insn (frame_reg_rtx,
29300 gen_rtx_MEM (Pmode, sp_reg_rtx));
29301 frame_off = 0;
29303 else if (frame_pointer_needed)
29304 frame_reg_rtx = hard_frame_pointer_rtx;
29307 reg = gen_rtx_REG (SImode, 12);
29308 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29309 info->vrsave_save_offset + frame_off));
29311 emit_insn (generate_set_vrsave (reg, info, 1));
29314 insn = NULL_RTX;
29315 /* If we have a large stack frame, restore the old stack pointer
29316 using the backchain. */
29317 if (use_backchain_to_restore_sp)
29319 if (frame_reg_rtx == sp_reg_rtx)
29321 /* Under V.4, don't reset the stack pointer until after we're done
29322 loading the saved registers. */
29323 if (DEFAULT_ABI == ABI_V4)
29324 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29326 insn = emit_move_insn (frame_reg_rtx,
29327 gen_rtx_MEM (Pmode, sp_reg_rtx));
29328 frame_off = 0;
29330 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29331 && DEFAULT_ABI == ABI_V4)
29332 /* frame_reg_rtx has been set up by the altivec restore. */
29334 else
29336 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
29337 frame_reg_rtx = sp_reg_rtx;
29340 /* If we have a frame pointer, we can restore the old stack pointer
29341 from it. */
29342 else if (frame_pointer_needed)
29344 frame_reg_rtx = sp_reg_rtx;
29345 if (DEFAULT_ABI == ABI_V4)
29346 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29347 /* Prevent reordering memory accesses against stack pointer restore. */
29348 else if (cfun->calls_alloca
29349 || offset_below_red_zone_p (-info->total_size))
29350 rs6000_emit_stack_tie (frame_reg_rtx, true);
29352 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
29353 GEN_INT (info->total_size)));
29354 frame_off = 0;
29356 else if (info->push_p
29357 && DEFAULT_ABI != ABI_V4
29358 && !crtl->calls_eh_return)
29360 /* Prevent reordering memory accesses against stack pointer restore. */
29361 if (cfun->calls_alloca
29362 || offset_below_red_zone_p (-info->total_size))
29363 rs6000_emit_stack_tie (frame_reg_rtx, false);
29364 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
29365 GEN_INT (info->total_size)));
29366 frame_off = 0;
29368 if (insn && frame_reg_rtx == sp_reg_rtx)
29370 if (cfa_restores)
29372 REG_NOTES (insn) = cfa_restores;
29373 cfa_restores = NULL_RTX;
29375 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29376 RTX_FRAME_RELATED_P (insn) = 1;
29379 /* Restore AltiVec registers if we have not done so already. */
29380 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29381 && info->altivec_size != 0
29382 && (DEFAULT_ABI == ABI_V4
29383 || !offset_below_red_zone_p (info->altivec_save_offset)))
29385 int i;
29387 if ((strategy & REST_INLINE_VRS) == 0)
29389 int end_save = info->altivec_save_offset + info->altivec_size;
29390 int ptr_off;
29391 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29392 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29393 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29395 if (end_save + frame_off != 0)
29397 rtx offset = GEN_INT (end_save + frame_off);
29399 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29401 else
29402 emit_move_insn (ptr_reg, frame_reg_rtx);
29404 ptr_off = -end_save;
29405 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29406 info->altivec_save_offset + ptr_off,
29407 0, V4SImode, SAVRES_VR);
29408 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
29410 /* Frame reg was clobbered by out-of-line save. Restore it
29411 from ptr_reg, and if we are calling out-of-line gpr or
29412 fpr restore set up the correct pointer and offset. */
29413 unsigned newptr_regno = 1;
29414 if (!restoring_GPRs_inline)
29416 bool lr = info->gp_save_offset + info->gp_size == 0;
29417 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29418 newptr_regno = ptr_regno_for_savres (sel);
29419 end_save = info->gp_save_offset + info->gp_size;
29421 else if (!restoring_FPRs_inline)
29423 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
29424 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29425 newptr_regno = ptr_regno_for_savres (sel);
29426 end_save = info->fp_save_offset + info->fp_size;
29429 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
29430 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
29432 if (end_save + ptr_off != 0)
29434 rtx offset = GEN_INT (end_save + ptr_off);
29436 frame_off = -end_save;
29437 if (TARGET_32BIT)
29438 emit_insn (gen_addsi3_carry (frame_reg_rtx,
29439 ptr_reg, offset));
29440 else
29441 emit_insn (gen_adddi3_carry (frame_reg_rtx,
29442 ptr_reg, offset));
29444 else
29446 frame_off = ptr_off;
29447 emit_move_insn (frame_reg_rtx, ptr_reg);
29451 else
29453 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29454 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29456 rtx addr, areg, mem, insn;
29457 rtx reg = gen_rtx_REG (V4SImode, i);
29458 HOST_WIDE_INT offset
29459 = (info->altivec_save_offset + frame_off
29460 + 16 * (i - info->first_altivec_reg_save));
29462 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29464 mem = gen_frame_mem (V4SImode,
29465 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29466 GEN_INT (offset)));
29467 insn = gen_rtx_SET (reg, mem);
29469 else
29471 areg = gen_rtx_REG (Pmode, 0);
29472 emit_move_insn (areg, GEN_INT (offset));
29474 /* AltiVec addressing mode is [reg+reg]. */
29475 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29476 mem = gen_frame_mem (V4SImode, addr);
29478 /* Rather than emitting a generic move, force use of the
29479 lvx instruction, which we always want. In particular we
29480 don't want lxvd2x/xxpermdi for little endian. */
29481 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29484 (void) emit_insn (insn);
29488 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29489 if (((strategy & REST_INLINE_VRS) == 0
29490 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29491 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29493 rtx reg = gen_rtx_REG (V4SImode, i);
29494 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29498 /* Restore VRSAVE if we have not done so already. */
29499 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29500 && info->vrsave_size != 0
29501 && (DEFAULT_ABI == ABI_V4
29502 || !offset_below_red_zone_p (info->vrsave_save_offset)))
29504 rtx reg;
29506 reg = gen_rtx_REG (SImode, 12);
29507 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29508 info->vrsave_save_offset + frame_off));
29510 emit_insn (generate_set_vrsave (reg, info, 1));
29513 /* If we exit by an out-of-line restore function on ABI_V4 then that
29514 function will deallocate the stack, so we don't need to worry
29515 about the unwinder restoring cr from an invalid stack frame
29516 location. */
29517 exit_func = (!restoring_FPRs_inline
29518 || (!restoring_GPRs_inline
29519 && info->first_fp_reg_save == 64));
29521 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
29522 *separate* slots if the routine calls __builtin_eh_return, so
29523 that they can be independently restored by the unwinder. */
29524 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
29526 int i, cr_off = info->ehcr_offset;
29528 for (i = 0; i < 8; i++)
29529 if (!call_used_regs[CR0_REGNO + i])
29531 rtx reg = gen_rtx_REG (SImode, 0);
29532 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29533 cr_off + frame_off));
29535 insn = emit_insn (gen_movsi_to_cr_one
29536 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29538 if (!exit_func && flag_shrink_wrap)
29540 add_reg_note (insn, REG_CFA_RESTORE,
29541 gen_rtx_REG (SImode, CR0_REGNO + i));
29543 RTX_FRAME_RELATED_P (insn) = 1;
29546 cr_off += reg_size;
29550 /* Get the old lr if we saved it. If we are restoring registers
29551 out-of-line, then the out-of-line routines can do this for us. */
29552 if (restore_lr && restoring_GPRs_inline)
29553 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29555 /* Get the old cr if we saved it. */
29556 if (info->cr_save_p)
29558 unsigned cr_save_regno = 12;
29560 if (!restoring_GPRs_inline)
29562 /* Ensure we don't use the register used by the out-of-line
29563 gpr register restore below. */
29564 bool lr = info->gp_save_offset + info->gp_size == 0;
29565 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29566 int gpr_ptr_regno = ptr_regno_for_savres (sel);
29568 if (gpr_ptr_regno == 12)
29569 cr_save_regno = 11;
29570 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
29572 else if (REGNO (frame_reg_rtx) == 12)
29573 cr_save_regno = 11;
29575 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
29576 info->cr_save_offset + frame_off,
29577 exit_func);
29580 /* Set LR here to try to overlap restores below. */
29581 if (restore_lr && restoring_GPRs_inline)
29582 restore_saved_lr (0, exit_func);
29584 /* Load exception handler data registers, if needed. */
29585 if (crtl->calls_eh_return)
29587 unsigned int i, regno;
29589 if (TARGET_AIX)
29591 rtx reg = gen_rtx_REG (reg_mode, 2);
29592 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29593 frame_off + RS6000_TOC_SAVE_SLOT));
29596 for (i = 0; ; ++i)
29598 rtx mem;
29600 regno = EH_RETURN_DATA_REGNO (i);
29601 if (regno == INVALID_REGNUM)
29602 break;
29604 /* Note: possible use of r0 here to address SPE regs. */
29605 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
29606 info->ehrd_offset + frame_off
29607 + reg_size * (int) i);
29609 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
29613 /* Restore GPRs. This is done as a PARALLEL if we are using
29614 the load-multiple instructions. */
29615 if (TARGET_SPE_ABI
29616 && info->spe_64bit_regs_used
29617 && info->first_gp_reg_save != 32)
29619 /* Determine whether we can address all of the registers that need
29620 to be saved with an offset from frame_reg_rtx that fits in
29621 the small const field for SPE memory instructions. */
29622 int spe_regs_addressable
29623 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29624 + reg_size * (32 - info->first_gp_reg_save - 1))
29625 && restoring_GPRs_inline);
29627 if (!spe_regs_addressable)
29629 int ool_adjust = 0;
29630 rtx old_frame_reg_rtx = frame_reg_rtx;
29631 /* Make r11 point to the start of the SPE save area. We worried about
29632 not clobbering it when we were saving registers in the prologue.
29633 There's no need to worry here because the static chain is passed
29634 anew to every function. */
29636 if (!restoring_GPRs_inline)
29637 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29638 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29639 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
29640 GEN_INT (info->spe_gp_save_offset
29641 + frame_off
29642 - ool_adjust)));
29643 /* Keep the invariant that frame_reg_rtx + frame_off points
29644 at the top of the stack frame. */
29645 frame_off = -info->spe_gp_save_offset + ool_adjust;
29648 if (restoring_GPRs_inline)
29650 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
29652 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29653 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29655 rtx offset, addr, mem, reg;
29657 /* We're doing all this to ensure that the immediate offset
29658 fits into the immediate field of 'evldd'. */
29659 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
29661 offset = GEN_INT (spe_offset + reg_size * i);
29662 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
29663 mem = gen_rtx_MEM (V2SImode, addr);
29664 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29666 emit_move_insn (reg, mem);
29669 else
29670 rs6000_emit_savres_rtx (info, frame_reg_rtx,
29671 info->spe_gp_save_offset + frame_off,
29672 info->lr_save_offset + frame_off,
29673 reg_mode,
29674 SAVRES_GPR | SAVRES_LR);
29676 else if (!restoring_GPRs_inline)
29678 /* We are jumping to an out-of-line function. */
29679 rtx ptr_reg;
29680 int end_save = info->gp_save_offset + info->gp_size;
29681 bool can_use_exit = end_save == 0;
29682 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
29683 int ptr_off;
29685 /* Emit stack reset code if we need it. */
29686 ptr_regno = ptr_regno_for_savres (sel);
29687 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29688 if (can_use_exit)
29689 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
29690 else if (end_save + frame_off != 0)
29691 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
29692 GEN_INT (end_save + frame_off)));
29693 else if (REGNO (frame_reg_rtx) != ptr_regno)
29694 emit_move_insn (ptr_reg, frame_reg_rtx);
29695 if (REGNO (frame_reg_rtx) == ptr_regno)
29696 frame_off = -end_save;
29698 if (can_use_exit && info->cr_save_p)
29699 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
29701 ptr_off = -end_save;
29702 rs6000_emit_savres_rtx (info, ptr_reg,
29703 info->gp_save_offset + ptr_off,
29704 info->lr_save_offset + ptr_off,
29705 reg_mode, sel);
29707 else if (using_load_multiple)
29709 rtvec p;
29710 p = rtvec_alloc (32 - info->first_gp_reg_save);
29711 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29712 RTVEC_ELT (p, i)
29713 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29714 frame_reg_rtx,
29715 info->gp_save_offset + frame_off + reg_size * i);
29716 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29718 else
29720 int offset = info->gp_save_offset + frame_off;
29721 for (i = info->first_gp_reg_save; i < 32; i++)
29723 if (rs6000_reg_live_or_pic_offset_p (i)
29724 && !cfun->machine->gpr_is_wrapped_separately[i])
29726 rtx reg = gen_rtx_REG (reg_mode, i);
29727 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
29730 offset += reg_size;
29734 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29736 /* If the frame pointer was used then we can't delay emitting
29737 a REG_CFA_DEF_CFA note. This must happen on the insn that
29738 restores the frame pointer, r31. We may have already emitted
29739 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
29740 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
29741 be harmless if emitted. */
29742 if (frame_pointer_needed)
29744 insn = get_last_insn ();
29745 add_reg_note (insn, REG_CFA_DEF_CFA,
29746 plus_constant (Pmode, frame_reg_rtx, frame_off));
29747 RTX_FRAME_RELATED_P (insn) = 1;
29750 /* Set up cfa_restores. We always need these when
29751 shrink-wrapping. If not shrink-wrapping then we only need
29752 the cfa_restore when the stack location is no longer valid.
29753 The cfa_restores must be emitted on or before the insn that
29754 invalidates the stack, and of course must not be emitted
29755 before the insn that actually does the restore. The latter
29756 is why it is a bad idea to emit the cfa_restores as a group
29757 on the last instruction here that actually does a restore:
29758 That insn may be reordered with respect to others doing
29759 restores. */
29760 if (flag_shrink_wrap
29761 && !restoring_GPRs_inline
29762 && info->first_fp_reg_save == 64)
29763 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29765 for (i = info->first_gp_reg_save; i < 32; i++)
29766 if (!restoring_GPRs_inline
29767 || using_load_multiple
29768 || rs6000_reg_live_or_pic_offset_p (i))
29770 if (cfun->machine->gpr_is_wrapped_separately[i])
29771 continue;
29773 rtx reg = gen_rtx_REG (reg_mode, i);
29774 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29778 if (!restoring_GPRs_inline
29779 && info->first_fp_reg_save == 64)
29781 /* We are jumping to an out-of-line function. */
29782 if (cfa_restores)
29783 emit_cfa_restores (cfa_restores);
29784 return;
29787 if (restore_lr && !restoring_GPRs_inline)
29789 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29790 restore_saved_lr (0, exit_func);
29793 /* Restore fpr's if we need to do it without calling a function. */
29794 if (restoring_FPRs_inline)
29795 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29796 if (save_reg_p (info->first_fp_reg_save + i))
29798 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29799 ? DFmode : SFmode),
29800 info->first_fp_reg_save + i);
29801 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29802 info->fp_save_offset + frame_off + 8 * i));
29803 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29804 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29807 /* If we saved cr, restore it here. Just those that were used. */
29808 if (info->cr_save_p)
29809 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
29811 /* If this is V.4, unwind the stack pointer after all of the loads
29812 have been done, or set up r11 if we are restoring fp out of line. */
29813 ptr_regno = 1;
29814 if (!restoring_FPRs_inline)
29816 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29817 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29818 ptr_regno = ptr_regno_for_savres (sel);
29821 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
29822 if (REGNO (frame_reg_rtx) == ptr_regno)
29823 frame_off = 0;
29825 if (insn && restoring_FPRs_inline)
29827 if (cfa_restores)
29829 REG_NOTES (insn) = cfa_restores;
29830 cfa_restores = NULL_RTX;
29832 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29833 RTX_FRAME_RELATED_P (insn) = 1;
29836 if (crtl->calls_eh_return)
29838 rtx sa = EH_RETURN_STACKADJ_RTX;
29839 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
29842 if (!sibcall && restoring_FPRs_inline)
29844 if (cfa_restores)
29846 /* We can't hang the cfa_restores off a simple return,
29847 since the shrink-wrap code sometimes uses an existing
29848 return. This means there might be a path from
29849 pre-prologue code to this return, and dwarf2cfi code
29850 wants the eh_frame unwinder state to be the same on
29851 all paths to any point. So we need to emit the
29852 cfa_restores before the return. For -m64 we really
29853 don't need epilogue cfa_restores at all, except for
29854 this irritating dwarf2cfi with shrink-wrap
29855 requirement; The stack red-zone means eh_frame info
29856 from the prologue telling the unwinder to restore
29857 from the stack is perfectly good right to the end of
29858 the function. */
29859 emit_insn (gen_blockage ());
29860 emit_cfa_restores (cfa_restores);
29861 cfa_restores = NULL_RTX;
29864 emit_jump_insn (targetm.gen_simple_return ());
29867 if (!sibcall && !restoring_FPRs_inline)
29869 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29870 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
29871 int elt = 0;
29872 RTVEC_ELT (p, elt++) = ret_rtx;
29873 if (lr)
29874 RTVEC_ELT (p, elt++)
29875 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29877 /* We have to restore more than two FP registers, so branch to the
29878 restore function. It will return to our caller. */
29879 int i;
29880 int reg;
29881 rtx sym;
29883 if (flag_shrink_wrap)
29884 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29886 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
29887 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
29888 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
29889 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
29891 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29893 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
29895 RTVEC_ELT (p, elt++)
29896 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
29897 if (flag_shrink_wrap)
29898 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29901 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29904 if (cfa_restores)
29906 if (sibcall)
29907 /* Ensure the cfa_restores are hung off an insn that won't
29908 be reordered above other restores. */
29909 emit_insn (gen_blockage ());
29911 emit_cfa_restores (cfa_restores);
29915 /* Write function epilogue. */
29917 static void
29918 rs6000_output_function_epilogue (FILE *file,
29919 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
29921 #if TARGET_MACHO
29922 macho_branch_islands ();
29923 /* Mach-O doesn't support labels at the end of objects, so if
29924 it looks like we might want one, insert a NOP. */
29926 rtx_insn *insn = get_last_insn ();
29927 rtx_insn *deleted_debug_label = NULL;
29928 while (insn
29929 && NOTE_P (insn)
29930 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
29932 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
29933 notes only, instead set their CODE_LABEL_NUMBER to -1,
29934 otherwise there would be code generation differences
29935 in between -g and -g0. */
29936 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29937 deleted_debug_label = insn;
29938 insn = PREV_INSN (insn);
29940 if (insn
29941 && (LABEL_P (insn)
29942 || (NOTE_P (insn)
29943 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
29944 fputs ("\tnop\n", file);
29945 else if (deleted_debug_label)
29946 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
29947 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29948 CODE_LABEL_NUMBER (insn) = -1;
29950 #endif
29952 /* Output a traceback table here. See /usr/include/sys/debug.h for info
29953 on its format.
29955 We don't output a traceback table if -finhibit-size-directive was
29956 used. The documentation for -finhibit-size-directive reads
29957 ``don't output a @code{.size} assembler directive, or anything
29958 else that would cause trouble if the function is split in the
29959 middle, and the two halves are placed at locations far apart in
29960 memory.'' The traceback table has this property, since it
29961 includes the offset from the start of the function to the
29962 traceback table itself.
29964 System V.4 Powerpc's (and the embedded ABI derived from it) use a
29965 different traceback table. */
29966 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29967 && ! flag_inhibit_size_directive
29968 && rs6000_traceback != traceback_none && !cfun->is_thunk)
29970 const char *fname = NULL;
29971 const char *language_string = lang_hooks.name;
29972 int fixed_parms = 0, float_parms = 0, parm_info = 0;
29973 int i;
29974 int optional_tbtab;
29975 rs6000_stack_t *info = rs6000_stack_info ();
29977 if (rs6000_traceback == traceback_full)
29978 optional_tbtab = 1;
29979 else if (rs6000_traceback == traceback_part)
29980 optional_tbtab = 0;
29981 else
29982 optional_tbtab = !optimize_size && !TARGET_ELF;
29984 if (optional_tbtab)
29986 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
29987 while (*fname == '.') /* V.4 encodes . in the name */
29988 fname++;
29990 /* Need label immediately before tbtab, so we can compute
29991 its offset from the function start. */
29992 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29993 ASM_OUTPUT_LABEL (file, fname);
29996 /* The .tbtab pseudo-op can only be used for the first eight
29997 expressions, since it can't handle the possibly variable
29998 length fields that follow. However, if you omit the optional
29999 fields, the assembler outputs zeros for all optional fields
30000 anyways, giving each variable length field is minimum length
30001 (as defined in sys/debug.h). Thus we can not use the .tbtab
30002 pseudo-op at all. */
30004 /* An all-zero word flags the start of the tbtab, for debuggers
30005 that have to find it by searching forward from the entry
30006 point or from the current pc. */
30007 fputs ("\t.long 0\n", file);
30009 /* Tbtab format type. Use format type 0. */
30010 fputs ("\t.byte 0,", file);
30012 /* Language type. Unfortunately, there does not seem to be any
30013 official way to discover the language being compiled, so we
30014 use language_string.
30015 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
30016 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
30017 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
30018 either, so for now use 0. */
30019 if (lang_GNU_C ()
30020 || ! strcmp (language_string, "GNU GIMPLE")
30021 || ! strcmp (language_string, "GNU Go")
30022 || ! strcmp (language_string, "libgccjit"))
30023 i = 0;
30024 else if (! strcmp (language_string, "GNU F77")
30025 || lang_GNU_Fortran ())
30026 i = 1;
30027 else if (! strcmp (language_string, "GNU Pascal"))
30028 i = 2;
30029 else if (! strcmp (language_string, "GNU Ada"))
30030 i = 3;
30031 else if (lang_GNU_CXX ()
30032 || ! strcmp (language_string, "GNU Objective-C++"))
30033 i = 9;
30034 else if (! strcmp (language_string, "GNU Java"))
30035 i = 13;
30036 else if (! strcmp (language_string, "GNU Objective-C"))
30037 i = 14;
30038 else
30039 gcc_unreachable ();
30040 fprintf (file, "%d,", i);
30042 /* 8 single bit fields: global linkage (not set for C extern linkage,
30043 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
30044 from start of procedure stored in tbtab, internal function, function
30045 has controlled storage, function has no toc, function uses fp,
30046 function logs/aborts fp operations. */
30047 /* Assume that fp operations are used if any fp reg must be saved. */
30048 fprintf (file, "%d,",
30049 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
30051 /* 6 bitfields: function is interrupt handler, name present in
30052 proc table, function calls alloca, on condition directives
30053 (controls stack walks, 3 bits), saves condition reg, saves
30054 link reg. */
30055 /* The `function calls alloca' bit seems to be set whenever reg 31 is
30056 set up as a frame pointer, even when there is no alloca call. */
30057 fprintf (file, "%d,",
30058 ((optional_tbtab << 6)
30059 | ((optional_tbtab & frame_pointer_needed) << 5)
30060 | (info->cr_save_p << 1)
30061 | (info->lr_save_p)));
30063 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
30064 (6 bits). */
30065 fprintf (file, "%d,",
30066 (info->push_p << 7) | (64 - info->first_fp_reg_save));
30068 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
30069 fprintf (file, "%d,", (32 - first_reg_to_save ()));
30071 if (optional_tbtab)
30073 /* Compute the parameter info from the function decl argument
30074 list. */
30075 tree decl;
30076 int next_parm_info_bit = 31;
30078 for (decl = DECL_ARGUMENTS (current_function_decl);
30079 decl; decl = DECL_CHAIN (decl))
30081 rtx parameter = DECL_INCOMING_RTL (decl);
30082 machine_mode mode = GET_MODE (parameter);
30084 if (GET_CODE (parameter) == REG)
30086 if (SCALAR_FLOAT_MODE_P (mode))
30088 int bits;
30090 float_parms++;
30092 switch (mode)
30094 case SFmode:
30095 case SDmode:
30096 bits = 0x2;
30097 break;
30099 case DFmode:
30100 case DDmode:
30101 case TFmode:
30102 case TDmode:
30103 case IFmode:
30104 case KFmode:
30105 bits = 0x3;
30106 break;
30108 default:
30109 gcc_unreachable ();
30112 /* If only one bit will fit, don't or in this entry. */
30113 if (next_parm_info_bit > 0)
30114 parm_info |= (bits << (next_parm_info_bit - 1));
30115 next_parm_info_bit -= 2;
30117 else
30119 fixed_parms += ((GET_MODE_SIZE (mode)
30120 + (UNITS_PER_WORD - 1))
30121 / UNITS_PER_WORD);
30122 next_parm_info_bit -= 1;
30128 /* Number of fixed point parameters. */
30129 /* This is actually the number of words of fixed point parameters; thus
30130 an 8 byte struct counts as 2; and thus the maximum value is 8. */
30131 fprintf (file, "%d,", fixed_parms);
30133 /* 2 bitfields: number of floating point parameters (7 bits), parameters
30134 all on stack. */
30135 /* This is actually the number of fp registers that hold parameters;
30136 and thus the maximum value is 13. */
30137 /* Set parameters on stack bit if parameters are not in their original
30138 registers, regardless of whether they are on the stack? Xlc
30139 seems to set the bit when not optimizing. */
30140 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
30142 if (! optional_tbtab)
30143 return;
30145 /* Optional fields follow. Some are variable length. */
30147 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
30148 11 double float. */
30149 /* There is an entry for each parameter in a register, in the order that
30150 they occur in the parameter list. Any intervening arguments on the
30151 stack are ignored. If the list overflows a long (max possible length
30152 34 bits) then completely leave off all elements that don't fit. */
30153 /* Only emit this long if there was at least one parameter. */
30154 if (fixed_parms || float_parms)
30155 fprintf (file, "\t.long %d\n", parm_info);
30157 /* Offset from start of code to tb table. */
30158 fputs ("\t.long ", file);
30159 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
30160 RS6000_OUTPUT_BASENAME (file, fname);
30161 putc ('-', file);
30162 rs6000_output_function_entry (file, fname);
30163 putc ('\n', file);
30165 /* Interrupt handler mask. */
30166 /* Omit this long, since we never set the interrupt handler bit
30167 above. */
30169 /* Number of CTL (controlled storage) anchors. */
30170 /* Omit this long, since the has_ctl bit is never set above. */
30172 /* Displacement into stack of each CTL anchor. */
30173 /* Omit this list of longs, because there are no CTL anchors. */
30175 /* Length of function name. */
30176 if (*fname == '*')
30177 ++fname;
30178 fprintf (file, "\t.short %d\n", (int) strlen (fname));
30180 /* Function name. */
30181 assemble_string (fname, strlen (fname));
30183 /* Register for alloca automatic storage; this is always reg 31.
30184 Only emit this if the alloca bit was set above. */
30185 if (frame_pointer_needed)
30186 fputs ("\t.byte 31\n", file);
30188 fputs ("\t.align 2\n", file);
30191 /* Arrange to define .LCTOC1 label, if not already done. */
30192 if (need_toc_init)
30194 need_toc_init = 0;
30195 if (!toc_initialized)
30197 switch_to_section (toc_section);
30198 switch_to_section (current_function_section ());
30203 /* -fsplit-stack support. */
30205 /* A SYMBOL_REF for __morestack. */
30206 static GTY(()) rtx morestack_ref;
30208 static rtx
30209 gen_add3_const (rtx rt, rtx ra, long c)
30211 if (TARGET_64BIT)
30212 return gen_adddi3 (rt, ra, GEN_INT (c));
30213 else
30214 return gen_addsi3 (rt, ra, GEN_INT (c));
30217 /* Emit -fsplit-stack prologue, which goes before the regular function
30218 prologue (at local entry point in the case of ELFv2). */
30220 void
30221 rs6000_expand_split_stack_prologue (void)
30223 rs6000_stack_t *info = rs6000_stack_info ();
30224 unsigned HOST_WIDE_INT allocate;
30225 long alloc_hi, alloc_lo;
30226 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
30227 rtx_insn *insn;
30229 gcc_assert (flag_split_stack && reload_completed);
30231 if (!info->push_p)
30232 return;
30234 if (global_regs[29])
30236 error ("-fsplit-stack uses register r29");
30237 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
30238 "conflicts with %qD", global_regs_decl[29]);
30241 allocate = info->total_size;
30242 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
30244 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
30245 return;
30247 if (morestack_ref == NULL_RTX)
30249 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
30250 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
30251 | SYMBOL_FLAG_FUNCTION);
30254 r0 = gen_rtx_REG (Pmode, 0);
30255 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30256 r12 = gen_rtx_REG (Pmode, 12);
30257 emit_insn (gen_load_split_stack_limit (r0));
30258 /* Always emit two insns here to calculate the requested stack,
30259 so that the linker can edit them when adjusting size for calling
30260 non-split-stack code. */
30261 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
30262 alloc_lo = -allocate - alloc_hi;
30263 if (alloc_hi != 0)
30265 emit_insn (gen_add3_const (r12, r1, alloc_hi));
30266 if (alloc_lo != 0)
30267 emit_insn (gen_add3_const (r12, r12, alloc_lo));
30268 else
30269 emit_insn (gen_nop ());
30271 else
30273 emit_insn (gen_add3_const (r12, r1, alloc_lo));
30274 emit_insn (gen_nop ());
30277 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30278 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
30279 ok_label = gen_label_rtx ();
30280 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30281 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
30282 gen_rtx_LABEL_REF (VOIDmode, ok_label),
30283 pc_rtx);
30284 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30285 JUMP_LABEL (jump) = ok_label;
30286 /* Mark the jump as very likely to be taken. */
30287 add_int_reg_note (jump, REG_BR_PROB,
30288 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
30290 lr = gen_rtx_REG (Pmode, LR_REGNO);
30291 insn = emit_move_insn (r0, lr);
30292 RTX_FRAME_RELATED_P (insn) = 1;
30293 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
30294 RTX_FRAME_RELATED_P (insn) = 1;
30296 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
30297 const0_rtx, const0_rtx));
30298 call_fusage = NULL_RTX;
30299 use_reg (&call_fusage, r12);
30300 /* Say the call uses r0, even though it doesn't, to stop regrename
30301 from twiddling with the insns saving lr, trashing args for cfun.
30302 The insns restoring lr are similarly protected by making
30303 split_stack_return use r0. */
30304 use_reg (&call_fusage, r0);
30305 add_function_usage_to (insn, call_fusage);
30306 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
30307 insn = emit_move_insn (lr, r0);
30308 add_reg_note (insn, REG_CFA_RESTORE, lr);
30309 RTX_FRAME_RELATED_P (insn) = 1;
30310 emit_insn (gen_split_stack_return ());
30312 emit_label (ok_label);
30313 LABEL_NUSES (ok_label) = 1;
30316 /* Return the internal arg pointer used for function incoming
30317 arguments. When -fsplit-stack, the arg pointer is r12 so we need
30318 to copy it to a pseudo in order for it to be preserved over calls
30319 and suchlike. We'd really like to use a pseudo here for the
30320 internal arg pointer but data-flow analysis is not prepared to
30321 accept pseudos as live at the beginning of a function. */
30323 static rtx
30324 rs6000_internal_arg_pointer (void)
30326 if (flag_split_stack
30327 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
30328 == NULL))
30331 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
30333 rtx pat;
30335 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
30336 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
30338 /* Put the pseudo initialization right after the note at the
30339 beginning of the function. */
30340 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
30341 gen_rtx_REG (Pmode, 12));
30342 push_topmost_sequence ();
30343 emit_insn_after (pat, get_insns ());
30344 pop_topmost_sequence ();
30346 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
30347 FIRST_PARM_OFFSET (current_function_decl));
30349 return virtual_incoming_args_rtx;
30352 /* We may have to tell the dataflow pass that the split stack prologue
30353 is initializing a register. */
30355 static void
30356 rs6000_live_on_entry (bitmap regs)
30358 if (flag_split_stack)
30359 bitmap_set_bit (regs, 12);
30362 /* Emit -fsplit-stack dynamic stack allocation space check. */
30364 void
30365 rs6000_split_stack_space_check (rtx size, rtx label)
30367 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30368 rtx limit = gen_reg_rtx (Pmode);
30369 rtx requested = gen_reg_rtx (Pmode);
30370 rtx cmp = gen_reg_rtx (CCUNSmode);
30371 rtx jump;
30373 emit_insn (gen_load_split_stack_limit (limit));
30374 if (CONST_INT_P (size))
30375 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
30376 else
30378 size = force_reg (Pmode, size);
30379 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
30381 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
30382 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30383 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
30384 gen_rtx_LABEL_REF (VOIDmode, label),
30385 pc_rtx);
30386 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30387 JUMP_LABEL (jump) = label;
30390 /* A C compound statement that outputs the assembler code for a thunk
30391 function, used to implement C++ virtual function calls with
30392 multiple inheritance. The thunk acts as a wrapper around a virtual
30393 function, adjusting the implicit object parameter before handing
30394 control off to the real function.
30396 First, emit code to add the integer DELTA to the location that
30397 contains the incoming first argument. Assume that this argument
30398 contains a pointer, and is the one used to pass the `this' pointer
30399 in C++. This is the incoming argument *before* the function
30400 prologue, e.g. `%o0' on a sparc. The addition must preserve the
30401 values of all other incoming arguments.
30403 After the addition, emit code to jump to FUNCTION, which is a
30404 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
30405 not touch the return address. Hence returning from FUNCTION will
30406 return to whoever called the current `thunk'.
30408 The effect must be as if FUNCTION had been called directly with the
30409 adjusted first argument. This macro is responsible for emitting
30410 all of the code for a thunk function; output_function_prologue()
30411 and output_function_epilogue() are not invoked.
30413 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
30414 been extracted from it.) It might possibly be useful on some
30415 targets, but probably not.
30417 If you do not define this macro, the target-independent code in the
30418 C++ frontend will generate a less efficient heavyweight thunk that
30419 calls FUNCTION instead of jumping to it. The generic approach does
30420 not support varargs. */
30422 static void
30423 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
30424 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
30425 tree function)
30427 rtx this_rtx, funexp;
30428 rtx_insn *insn;
30430 reload_completed = 1;
30431 epilogue_completed = 1;
30433 /* Mark the end of the (empty) prologue. */
30434 emit_note (NOTE_INSN_PROLOGUE_END);
30436 /* Find the "this" pointer. If the function returns a structure,
30437 the structure return pointer is in r3. */
30438 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
30439 this_rtx = gen_rtx_REG (Pmode, 4);
30440 else
30441 this_rtx = gen_rtx_REG (Pmode, 3);
30443 /* Apply the constant offset, if required. */
30444 if (delta)
30445 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
30447 /* Apply the offset from the vtable, if required. */
30448 if (vcall_offset)
30450 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
30451 rtx tmp = gen_rtx_REG (Pmode, 12);
30453 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
30454 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
30456 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
30457 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
30459 else
30461 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
30463 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
30465 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
30468 /* Generate a tail call to the target function. */
30469 if (!TREE_USED (function))
30471 assemble_external (function);
30472 TREE_USED (function) = 1;
30474 funexp = XEXP (DECL_RTL (function), 0);
30475 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
30477 #if TARGET_MACHO
30478 if (MACHOPIC_INDIRECT)
30479 funexp = machopic_indirect_call_target (funexp);
30480 #endif
30482 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
30483 generate sibcall RTL explicitly. */
30484 insn = emit_call_insn (
30485 gen_rtx_PARALLEL (VOIDmode,
30486 gen_rtvec (3,
30487 gen_rtx_CALL (VOIDmode,
30488 funexp, const0_rtx),
30489 gen_rtx_USE (VOIDmode, const0_rtx),
30490 simple_return_rtx)));
30491 SIBLING_CALL_P (insn) = 1;
30492 emit_barrier ();
30494 /* Run just enough of rest_of_compilation to get the insns emitted.
30495 There's not really enough bulk here to make other passes such as
30496 instruction scheduling worth while. Note that use_thunk calls
30497 assemble_start_function and assemble_end_function. */
30498 insn = get_insns ();
30499 shorten_branches (insn);
30500 final_start_function (insn, file, 1);
30501 final (insn, file, 1);
30502 final_end_function ();
30504 reload_completed = 0;
30505 epilogue_completed = 0;
30508 /* A quick summary of the various types of 'constant-pool tables'
30509 under PowerPC:
30511 Target Flags Name One table per
30512 AIX (none) AIX TOC object file
30513 AIX -mfull-toc AIX TOC object file
30514 AIX -mminimal-toc AIX minimal TOC translation unit
30515 SVR4/EABI (none) SVR4 SDATA object file
30516 SVR4/EABI -fpic SVR4 pic object file
30517 SVR4/EABI -fPIC SVR4 PIC translation unit
30518 SVR4/EABI -mrelocatable EABI TOC function
30519 SVR4/EABI -maix AIX TOC object file
30520 SVR4/EABI -maix -mminimal-toc
30521 AIX minimal TOC translation unit
30523 Name Reg. Set by entries contains:
30524 made by addrs? fp? sum?
30526 AIX TOC 2 crt0 as Y option option
30527 AIX minimal TOC 30 prolog gcc Y Y option
30528 SVR4 SDATA 13 crt0 gcc N Y N
30529 SVR4 pic 30 prolog ld Y not yet N
30530 SVR4 PIC 30 prolog gcc Y option option
30531 EABI TOC 30 prolog gcc Y option option
30535 /* Hash functions for the hash table. */
30537 static unsigned
30538 rs6000_hash_constant (rtx k)
30540 enum rtx_code code = GET_CODE (k);
30541 machine_mode mode = GET_MODE (k);
30542 unsigned result = (code << 3) ^ mode;
30543 const char *format;
30544 int flen, fidx;
30546 format = GET_RTX_FORMAT (code);
30547 flen = strlen (format);
30548 fidx = 0;
30550 switch (code)
30552 case LABEL_REF:
30553 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
30555 case CONST_WIDE_INT:
30557 int i;
30558 flen = CONST_WIDE_INT_NUNITS (k);
30559 for (i = 0; i < flen; i++)
30560 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
30561 return result;
30564 case CONST_DOUBLE:
30565 if (mode != VOIDmode)
30566 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
30567 flen = 2;
30568 break;
30570 case CODE_LABEL:
30571 fidx = 3;
30572 break;
30574 default:
30575 break;
30578 for (; fidx < flen; fidx++)
30579 switch (format[fidx])
30581 case 's':
30583 unsigned i, len;
30584 const char *str = XSTR (k, fidx);
30585 len = strlen (str);
30586 result = result * 613 + len;
30587 for (i = 0; i < len; i++)
30588 result = result * 613 + (unsigned) str[i];
30589 break;
30591 case 'u':
30592 case 'e':
30593 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
30594 break;
30595 case 'i':
30596 case 'n':
30597 result = result * 613 + (unsigned) XINT (k, fidx);
30598 break;
30599 case 'w':
30600 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
30601 result = result * 613 + (unsigned) XWINT (k, fidx);
30602 else
30604 size_t i;
30605 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
30606 result = result * 613 + (unsigned) (XWINT (k, fidx)
30607 >> CHAR_BIT * i);
30609 break;
30610 case '0':
30611 break;
30612 default:
30613 gcc_unreachable ();
30616 return result;
30619 hashval_t
30620 toc_hasher::hash (toc_hash_struct *thc)
30622 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
30625 /* Compare H1 and H2 for equivalence. */
30627 bool
30628 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
30630 rtx r1 = h1->key;
30631 rtx r2 = h2->key;
30633 if (h1->key_mode != h2->key_mode)
30634 return 0;
30636 return rtx_equal_p (r1, r2);
30639 /* These are the names given by the C++ front-end to vtables, and
30640 vtable-like objects. Ideally, this logic should not be here;
30641 instead, there should be some programmatic way of inquiring as
30642 to whether or not an object is a vtable. */
30644 #define VTABLE_NAME_P(NAME) \
30645 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
30646 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
30647 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
30648 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
30649 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
30651 #ifdef NO_DOLLAR_IN_LABEL
30652 /* Return a GGC-allocated character string translating dollar signs in
30653 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
30655 const char *
30656 rs6000_xcoff_strip_dollar (const char *name)
30658 char *strip, *p;
30659 const char *q;
30660 size_t len;
30662 q = (const char *) strchr (name, '$');
30664 if (q == 0 || q == name)
30665 return name;
30667 len = strlen (name);
30668 strip = XALLOCAVEC (char, len + 1);
30669 strcpy (strip, name);
30670 p = strip + (q - name);
30671 while (p)
30673 *p = '_';
30674 p = strchr (p + 1, '$');
30677 return ggc_alloc_string (strip, len);
30679 #endif
30681 void
30682 rs6000_output_symbol_ref (FILE *file, rtx x)
30684 const char *name = XSTR (x, 0);
30686 /* Currently C++ toc references to vtables can be emitted before it
30687 is decided whether the vtable is public or private. If this is
30688 the case, then the linker will eventually complain that there is
30689 a reference to an unknown section. Thus, for vtables only,
30690 we emit the TOC reference to reference the identifier and not the
30691 symbol. */
30692 if (VTABLE_NAME_P (name))
30694 RS6000_OUTPUT_BASENAME (file, name);
30696 else
30697 assemble_name (file, name);
30700 /* Output a TOC entry. We derive the entry name from what is being
30701 written. */
30703 void
30704 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
30706 char buf[256];
30707 const char *name = buf;
30708 rtx base = x;
30709 HOST_WIDE_INT offset = 0;
30711 gcc_assert (!TARGET_NO_TOC);
30713 /* When the linker won't eliminate them, don't output duplicate
30714 TOC entries (this happens on AIX if there is any kind of TOC,
30715 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
30716 CODE_LABELs. */
30717 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
30719 struct toc_hash_struct *h;
30721 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
30722 time because GGC is not initialized at that point. */
30723 if (toc_hash_table == NULL)
30724 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
30726 h = ggc_alloc<toc_hash_struct> ();
30727 h->key = x;
30728 h->key_mode = mode;
30729 h->labelno = labelno;
30731 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
30732 if (*found == NULL)
30733 *found = h;
30734 else /* This is indeed a duplicate.
30735 Set this label equal to that label. */
30737 fputs ("\t.set ", file);
30738 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30739 fprintf (file, "%d,", labelno);
30740 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30741 fprintf (file, "%d\n", ((*found)->labelno));
30743 #ifdef HAVE_AS_TLS
30744 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
30745 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
30746 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
30748 fputs ("\t.set ", file);
30749 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30750 fprintf (file, "%d,", labelno);
30751 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30752 fprintf (file, "%d\n", ((*found)->labelno));
30754 #endif
30755 return;
30759 /* If we're going to put a double constant in the TOC, make sure it's
30760 aligned properly when strict alignment is on. */
30761 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
30762 && STRICT_ALIGNMENT
30763 && GET_MODE_BITSIZE (mode) >= 64
30764 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
30765 ASM_OUTPUT_ALIGN (file, 3);
30768 (*targetm.asm_out.internal_label) (file, "LC", labelno);
30770 /* Handle FP constants specially. Note that if we have a minimal
30771 TOC, things we put here aren't actually in the TOC, so we can allow
30772 FP constants. */
30773 if (GET_CODE (x) == CONST_DOUBLE &&
30774 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
30775 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
30777 long k[4];
30779 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30780 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
30781 else
30782 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
30784 if (TARGET_64BIT)
30786 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30787 fputs (DOUBLE_INT_ASM_OP, file);
30788 else
30789 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30790 k[0] & 0xffffffff, k[1] & 0xffffffff,
30791 k[2] & 0xffffffff, k[3] & 0xffffffff);
30792 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
30793 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30794 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
30795 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
30796 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
30797 return;
30799 else
30801 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30802 fputs ("\t.long ", file);
30803 else
30804 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30805 k[0] & 0xffffffff, k[1] & 0xffffffff,
30806 k[2] & 0xffffffff, k[3] & 0xffffffff);
30807 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
30808 k[0] & 0xffffffff, k[1] & 0xffffffff,
30809 k[2] & 0xffffffff, k[3] & 0xffffffff);
30810 return;
30813 else if (GET_CODE (x) == CONST_DOUBLE &&
30814 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
30816 long k[2];
30818 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30819 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
30820 else
30821 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
30823 if (TARGET_64BIT)
30825 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30826 fputs (DOUBLE_INT_ASM_OP, file);
30827 else
30828 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30829 k[0] & 0xffffffff, k[1] & 0xffffffff);
30830 fprintf (file, "0x%lx%08lx\n",
30831 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30832 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
30833 return;
30835 else
30837 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30838 fputs ("\t.long ", file);
30839 else
30840 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30841 k[0] & 0xffffffff, k[1] & 0xffffffff);
30842 fprintf (file, "0x%lx,0x%lx\n",
30843 k[0] & 0xffffffff, k[1] & 0xffffffff);
30844 return;
30847 else if (GET_CODE (x) == CONST_DOUBLE &&
30848 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
30850 long l;
30852 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30853 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
30854 else
30855 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
30857 if (TARGET_64BIT)
30859 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30860 fputs (DOUBLE_INT_ASM_OP, file);
30861 else
30862 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30863 if (WORDS_BIG_ENDIAN)
30864 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
30865 else
30866 fprintf (file, "0x%lx\n", l & 0xffffffff);
30867 return;
30869 else
30871 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30872 fputs ("\t.long ", file);
30873 else
30874 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30875 fprintf (file, "0x%lx\n", l & 0xffffffff);
30876 return;
30879 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
30881 unsigned HOST_WIDE_INT low;
30882 HOST_WIDE_INT high;
30884 low = INTVAL (x) & 0xffffffff;
30885 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
30887 /* TOC entries are always Pmode-sized, so when big-endian
30888 smaller integer constants in the TOC need to be padded.
30889 (This is still a win over putting the constants in
30890 a separate constant pool, because then we'd have
30891 to have both a TOC entry _and_ the actual constant.)
30893 For a 32-bit target, CONST_INT values are loaded and shifted
30894 entirely within `low' and can be stored in one TOC entry. */
30896 /* It would be easy to make this work, but it doesn't now. */
30897 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
30899 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
30901 low |= high << 32;
30902 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
30903 high = (HOST_WIDE_INT) low >> 32;
30904 low &= 0xffffffff;
30907 if (TARGET_64BIT)
30909 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30910 fputs (DOUBLE_INT_ASM_OP, file);
30911 else
30912 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30913 (long) high & 0xffffffff, (long) low & 0xffffffff);
30914 fprintf (file, "0x%lx%08lx\n",
30915 (long) high & 0xffffffff, (long) low & 0xffffffff);
30916 return;
30918 else
30920 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
30922 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30923 fputs ("\t.long ", file);
30924 else
30925 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30926 (long) high & 0xffffffff, (long) low & 0xffffffff);
30927 fprintf (file, "0x%lx,0x%lx\n",
30928 (long) high & 0xffffffff, (long) low & 0xffffffff);
30930 else
30932 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30933 fputs ("\t.long ", file);
30934 else
30935 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
30936 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
30938 return;
30942 if (GET_CODE (x) == CONST)
30944 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
30945 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
30947 base = XEXP (XEXP (x, 0), 0);
30948 offset = INTVAL (XEXP (XEXP (x, 0), 1));
30951 switch (GET_CODE (base))
30953 case SYMBOL_REF:
30954 name = XSTR (base, 0);
30955 break;
30957 case LABEL_REF:
30958 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
30959 CODE_LABEL_NUMBER (XEXP (base, 0)));
30960 break;
30962 case CODE_LABEL:
30963 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
30964 break;
30966 default:
30967 gcc_unreachable ();
30970 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30971 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
30972 else
30974 fputs ("\t.tc ", file);
30975 RS6000_OUTPUT_BASENAME (file, name);
30977 if (offset < 0)
30978 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
30979 else if (offset)
30980 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
30982 /* Mark large TOC symbols on AIX with [TE] so they are mapped
30983 after other TOC symbols, reducing overflow of small TOC access
30984 to [TC] symbols. */
30985 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
30986 ? "[TE]," : "[TC],", file);
30989 /* Currently C++ toc references to vtables can be emitted before it
30990 is decided whether the vtable is public or private. If this is
30991 the case, then the linker will eventually complain that there is
30992 a TOC reference to an unknown section. Thus, for vtables only,
30993 we emit the TOC reference to reference the symbol and not the
30994 section. */
30995 if (VTABLE_NAME_P (name))
30997 RS6000_OUTPUT_BASENAME (file, name);
30998 if (offset < 0)
30999 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
31000 else if (offset > 0)
31001 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
31003 else
31004 output_addr_const (file, x);
31006 #if HAVE_AS_TLS
31007 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
31009 switch (SYMBOL_REF_TLS_MODEL (base))
31011 case 0:
31012 break;
31013 case TLS_MODEL_LOCAL_EXEC:
31014 fputs ("@le", file);
31015 break;
31016 case TLS_MODEL_INITIAL_EXEC:
31017 fputs ("@ie", file);
31018 break;
31019 /* Use global-dynamic for local-dynamic. */
31020 case TLS_MODEL_GLOBAL_DYNAMIC:
31021 case TLS_MODEL_LOCAL_DYNAMIC:
31022 putc ('\n', file);
31023 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
31024 fputs ("\t.tc .", file);
31025 RS6000_OUTPUT_BASENAME (file, name);
31026 fputs ("[TC],", file);
31027 output_addr_const (file, x);
31028 fputs ("@m", file);
31029 break;
31030 default:
31031 gcc_unreachable ();
31034 #endif
31036 putc ('\n', file);
31039 /* Output an assembler pseudo-op to write an ASCII string of N characters
31040 starting at P to FILE.
31042 On the RS/6000, we have to do this using the .byte operation and
31043 write out special characters outside the quoted string.
31044 Also, the assembler is broken; very long strings are truncated,
31045 so we must artificially break them up early. */
31047 void
31048 output_ascii (FILE *file, const char *p, int n)
31050 char c;
31051 int i, count_string;
31052 const char *for_string = "\t.byte \"";
31053 const char *for_decimal = "\t.byte ";
31054 const char *to_close = NULL;
31056 count_string = 0;
31057 for (i = 0; i < n; i++)
31059 c = *p++;
31060 if (c >= ' ' && c < 0177)
31062 if (for_string)
31063 fputs (for_string, file);
31064 putc (c, file);
31066 /* Write two quotes to get one. */
31067 if (c == '"')
31069 putc (c, file);
31070 ++count_string;
31073 for_string = NULL;
31074 for_decimal = "\"\n\t.byte ";
31075 to_close = "\"\n";
31076 ++count_string;
31078 if (count_string >= 512)
31080 fputs (to_close, file);
31082 for_string = "\t.byte \"";
31083 for_decimal = "\t.byte ";
31084 to_close = NULL;
31085 count_string = 0;
31088 else
31090 if (for_decimal)
31091 fputs (for_decimal, file);
31092 fprintf (file, "%d", c);
31094 for_string = "\n\t.byte \"";
31095 for_decimal = ", ";
31096 to_close = "\n";
31097 count_string = 0;
31101 /* Now close the string if we have written one. Then end the line. */
31102 if (to_close)
31103 fputs (to_close, file);
31106 /* Generate a unique section name for FILENAME for a section type
31107 represented by SECTION_DESC. Output goes into BUF.
31109 SECTION_DESC can be any string, as long as it is different for each
31110 possible section type.
31112 We name the section in the same manner as xlc. The name begins with an
31113 underscore followed by the filename (after stripping any leading directory
31114 names) with the last period replaced by the string SECTION_DESC. If
31115 FILENAME does not contain a period, SECTION_DESC is appended to the end of
31116 the name. */
31118 void
31119 rs6000_gen_section_name (char **buf, const char *filename,
31120 const char *section_desc)
31122 const char *q, *after_last_slash, *last_period = 0;
31123 char *p;
31124 int len;
31126 after_last_slash = filename;
31127 for (q = filename; *q; q++)
31129 if (*q == '/')
31130 after_last_slash = q + 1;
31131 else if (*q == '.')
31132 last_period = q;
31135 len = strlen (after_last_slash) + strlen (section_desc) + 2;
31136 *buf = (char *) xmalloc (len);
31138 p = *buf;
31139 *p++ = '_';
31141 for (q = after_last_slash; *q; q++)
31143 if (q == last_period)
31145 strcpy (p, section_desc);
31146 p += strlen (section_desc);
31147 break;
31150 else if (ISALNUM (*q))
31151 *p++ = *q;
31154 if (last_period == 0)
31155 strcpy (p, section_desc);
31156 else
31157 *p = '\0';
31160 /* Emit profile function. */
31162 void
31163 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
31165 /* Non-standard profiling for kernels, which just saves LR then calls
31166 _mcount without worrying about arg saves. The idea is to change
31167 the function prologue as little as possible as it isn't easy to
31168 account for arg save/restore code added just for _mcount. */
31169 if (TARGET_PROFILE_KERNEL)
31170 return;
31172 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31174 #ifndef NO_PROFILE_COUNTERS
31175 # define NO_PROFILE_COUNTERS 0
31176 #endif
31177 if (NO_PROFILE_COUNTERS)
31178 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31179 LCT_NORMAL, VOIDmode, 0);
31180 else
31182 char buf[30];
31183 const char *label_name;
31184 rtx fun;
31186 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31187 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
31188 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
31190 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31191 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
31194 else if (DEFAULT_ABI == ABI_DARWIN)
31196 const char *mcount_name = RS6000_MCOUNT;
31197 int caller_addr_regno = LR_REGNO;
31199 /* Be conservative and always set this, at least for now. */
31200 crtl->uses_pic_offset_table = 1;
31202 #if TARGET_MACHO
31203 /* For PIC code, set up a stub and collect the caller's address
31204 from r0, which is where the prologue puts it. */
31205 if (MACHOPIC_INDIRECT
31206 && crtl->uses_pic_offset_table)
31207 caller_addr_regno = 0;
31208 #endif
31209 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
31210 LCT_NORMAL, VOIDmode, 1,
31211 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
31215 /* Write function profiler code. */
31217 void
31218 output_function_profiler (FILE *file, int labelno)
31220 char buf[100];
31222 switch (DEFAULT_ABI)
31224 default:
31225 gcc_unreachable ();
31227 case ABI_V4:
31228 if (!TARGET_32BIT)
31230 warning (0, "no profiling of 64-bit code for this ABI");
31231 return;
31233 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31234 fprintf (file, "\tmflr %s\n", reg_names[0]);
31235 if (NO_PROFILE_COUNTERS)
31237 asm_fprintf (file, "\tstw %s,4(%s)\n",
31238 reg_names[0], reg_names[1]);
31240 else if (TARGET_SECURE_PLT && flag_pic)
31242 if (TARGET_LINK_STACK)
31244 char name[32];
31245 get_ppc476_thunk_name (name);
31246 asm_fprintf (file, "\tbl %s\n", name);
31248 else
31249 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
31250 asm_fprintf (file, "\tstw %s,4(%s)\n",
31251 reg_names[0], reg_names[1]);
31252 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31253 asm_fprintf (file, "\taddis %s,%s,",
31254 reg_names[12], reg_names[12]);
31255 assemble_name (file, buf);
31256 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
31257 assemble_name (file, buf);
31258 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
31260 else if (flag_pic == 1)
31262 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
31263 asm_fprintf (file, "\tstw %s,4(%s)\n",
31264 reg_names[0], reg_names[1]);
31265 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31266 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
31267 assemble_name (file, buf);
31268 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
31270 else if (flag_pic > 1)
31272 asm_fprintf (file, "\tstw %s,4(%s)\n",
31273 reg_names[0], reg_names[1]);
31274 /* Now, we need to get the address of the label. */
31275 if (TARGET_LINK_STACK)
31277 char name[32];
31278 get_ppc476_thunk_name (name);
31279 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
31280 assemble_name (file, buf);
31281 fputs ("-.\n1:", file);
31282 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31283 asm_fprintf (file, "\taddi %s,%s,4\n",
31284 reg_names[11], reg_names[11]);
31286 else
31288 fputs ("\tbcl 20,31,1f\n\t.long ", file);
31289 assemble_name (file, buf);
31290 fputs ("-.\n1:", file);
31291 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31293 asm_fprintf (file, "\tlwz %s,0(%s)\n",
31294 reg_names[0], reg_names[11]);
31295 asm_fprintf (file, "\tadd %s,%s,%s\n",
31296 reg_names[0], reg_names[0], reg_names[11]);
31298 else
31300 asm_fprintf (file, "\tlis %s,", reg_names[12]);
31301 assemble_name (file, buf);
31302 fputs ("@ha\n", file);
31303 asm_fprintf (file, "\tstw %s,4(%s)\n",
31304 reg_names[0], reg_names[1]);
31305 asm_fprintf (file, "\tla %s,", reg_names[0]);
31306 assemble_name (file, buf);
31307 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
31310 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
31311 fprintf (file, "\tbl %s%s\n",
31312 RS6000_MCOUNT, flag_pic ? "@plt" : "");
31313 break;
31315 case ABI_AIX:
31316 case ABI_ELFv2:
31317 case ABI_DARWIN:
31318 /* Don't do anything, done in output_profile_hook (). */
31319 break;
31325 /* The following variable value is the last issued insn. */
31327 static rtx_insn *last_scheduled_insn;
31329 /* The following variable helps to balance issuing of load and
31330 store instructions */
31332 static int load_store_pendulum;
31334 /* The following variable helps pair divide insns during scheduling. */
31335 static int divide_cnt;
31336 /* The following variable helps pair and alternate vector and vector load
31337 insns during scheduling. */
31338 static int vec_load_pendulum;
31341 /* Power4 load update and store update instructions are cracked into a
31342 load or store and an integer insn which are executed in the same cycle.
31343 Branches have their own dispatch slot which does not count against the
31344 GCC issue rate, but it changes the program flow so there are no other
31345 instructions to issue in this cycle. */
31347 static int
31348 rs6000_variable_issue_1 (rtx_insn *insn, int more)
31350 last_scheduled_insn = insn;
31351 if (GET_CODE (PATTERN (insn)) == USE
31352 || GET_CODE (PATTERN (insn)) == CLOBBER)
31354 cached_can_issue_more = more;
31355 return cached_can_issue_more;
31358 if (insn_terminates_group_p (insn, current_group))
31360 cached_can_issue_more = 0;
31361 return cached_can_issue_more;
31364 /* If no reservation, but reach here */
31365 if (recog_memoized (insn) < 0)
31366 return more;
31368 if (rs6000_sched_groups)
31370 if (is_microcoded_insn (insn))
31371 cached_can_issue_more = 0;
31372 else if (is_cracked_insn (insn))
31373 cached_can_issue_more = more > 2 ? more - 2 : 0;
31374 else
31375 cached_can_issue_more = more - 1;
31377 return cached_can_issue_more;
31380 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
31381 return 0;
31383 cached_can_issue_more = more - 1;
31384 return cached_can_issue_more;
31387 static int
31388 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
31390 int r = rs6000_variable_issue_1 (insn, more);
31391 if (verbose)
31392 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
31393 return r;
31396 /* Adjust the cost of a scheduling dependency. Return the new cost of
31397 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
31399 static int
31400 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
31401 unsigned int)
31403 enum attr_type attr_type;
31405 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
31406 return cost;
31408 switch (dep_type)
31410 case REG_DEP_TRUE:
31412 /* Data dependency; DEP_INSN writes a register that INSN reads
31413 some cycles later. */
31415 /* Separate a load from a narrower, dependent store. */
31416 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
31417 && GET_CODE (PATTERN (insn)) == SET
31418 && GET_CODE (PATTERN (dep_insn)) == SET
31419 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
31420 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
31421 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
31422 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
31423 return cost + 14;
31425 attr_type = get_attr_type (insn);
31427 switch (attr_type)
31429 case TYPE_JMPREG:
31430 /* Tell the first scheduling pass about the latency between
31431 a mtctr and bctr (and mtlr and br/blr). The first
31432 scheduling pass will not know about this latency since
31433 the mtctr instruction, which has the latency associated
31434 to it, will be generated by reload. */
31435 return 4;
31436 case TYPE_BRANCH:
31437 /* Leave some extra cycles between a compare and its
31438 dependent branch, to inhibit expensive mispredicts. */
31439 if ((rs6000_cpu_attr == CPU_PPC603
31440 || rs6000_cpu_attr == CPU_PPC604
31441 || rs6000_cpu_attr == CPU_PPC604E
31442 || rs6000_cpu_attr == CPU_PPC620
31443 || rs6000_cpu_attr == CPU_PPC630
31444 || rs6000_cpu_attr == CPU_PPC750
31445 || rs6000_cpu_attr == CPU_PPC7400
31446 || rs6000_cpu_attr == CPU_PPC7450
31447 || rs6000_cpu_attr == CPU_PPCE5500
31448 || rs6000_cpu_attr == CPU_PPCE6500
31449 || rs6000_cpu_attr == CPU_POWER4
31450 || rs6000_cpu_attr == CPU_POWER5
31451 || rs6000_cpu_attr == CPU_POWER7
31452 || rs6000_cpu_attr == CPU_POWER8
31453 || rs6000_cpu_attr == CPU_POWER9
31454 || rs6000_cpu_attr == CPU_CELL)
31455 && recog_memoized (dep_insn)
31456 && (INSN_CODE (dep_insn) >= 0))
31458 switch (get_attr_type (dep_insn))
31460 case TYPE_CMP:
31461 case TYPE_FPCOMPARE:
31462 case TYPE_CR_LOGICAL:
31463 case TYPE_DELAYED_CR:
31464 return cost + 2;
31465 case TYPE_EXTS:
31466 case TYPE_MUL:
31467 if (get_attr_dot (dep_insn) == DOT_YES)
31468 return cost + 2;
31469 else
31470 break;
31471 case TYPE_SHIFT:
31472 if (get_attr_dot (dep_insn) == DOT_YES
31473 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
31474 return cost + 2;
31475 else
31476 break;
31477 default:
31478 break;
31480 break;
31482 case TYPE_STORE:
31483 case TYPE_FPSTORE:
31484 if ((rs6000_cpu == PROCESSOR_POWER6)
31485 && recog_memoized (dep_insn)
31486 && (INSN_CODE (dep_insn) >= 0))
31489 if (GET_CODE (PATTERN (insn)) != SET)
31490 /* If this happens, we have to extend this to schedule
31491 optimally. Return default for now. */
31492 return cost;
31494 /* Adjust the cost for the case where the value written
31495 by a fixed point operation is used as the address
31496 gen value on a store. */
31497 switch (get_attr_type (dep_insn))
31499 case TYPE_LOAD:
31500 case TYPE_CNTLZ:
31502 if (! store_data_bypass_p (dep_insn, insn))
31503 return get_attr_sign_extend (dep_insn)
31504 == SIGN_EXTEND_YES ? 6 : 4;
31505 break;
31507 case TYPE_SHIFT:
31509 if (! store_data_bypass_p (dep_insn, insn))
31510 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31511 6 : 3;
31512 break;
31514 case TYPE_INTEGER:
31515 case TYPE_ADD:
31516 case TYPE_LOGICAL:
31517 case TYPE_EXTS:
31518 case TYPE_INSERT:
31520 if (! store_data_bypass_p (dep_insn, insn))
31521 return 3;
31522 break;
31524 case TYPE_STORE:
31525 case TYPE_FPLOAD:
31526 case TYPE_FPSTORE:
31528 if (get_attr_update (dep_insn) == UPDATE_YES
31529 && ! store_data_bypass_p (dep_insn, insn))
31530 return 3;
31531 break;
31533 case TYPE_MUL:
31535 if (! store_data_bypass_p (dep_insn, insn))
31536 return 17;
31537 break;
31539 case TYPE_DIV:
31541 if (! store_data_bypass_p (dep_insn, insn))
31542 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31543 break;
31545 default:
31546 break;
31549 break;
31551 case TYPE_LOAD:
31552 if ((rs6000_cpu == PROCESSOR_POWER6)
31553 && recog_memoized (dep_insn)
31554 && (INSN_CODE (dep_insn) >= 0))
31557 /* Adjust the cost for the case where the value written
31558 by a fixed point instruction is used within the address
31559 gen portion of a subsequent load(u)(x) */
31560 switch (get_attr_type (dep_insn))
31562 case TYPE_LOAD:
31563 case TYPE_CNTLZ:
31565 if (set_to_load_agen (dep_insn, insn))
31566 return get_attr_sign_extend (dep_insn)
31567 == SIGN_EXTEND_YES ? 6 : 4;
31568 break;
31570 case TYPE_SHIFT:
31572 if (set_to_load_agen (dep_insn, insn))
31573 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31574 6 : 3;
31575 break;
31577 case TYPE_INTEGER:
31578 case TYPE_ADD:
31579 case TYPE_LOGICAL:
31580 case TYPE_EXTS:
31581 case TYPE_INSERT:
31583 if (set_to_load_agen (dep_insn, insn))
31584 return 3;
31585 break;
31587 case TYPE_STORE:
31588 case TYPE_FPLOAD:
31589 case TYPE_FPSTORE:
31591 if (get_attr_update (dep_insn) == UPDATE_YES
31592 && set_to_load_agen (dep_insn, insn))
31593 return 3;
31594 break;
31596 case TYPE_MUL:
31598 if (set_to_load_agen (dep_insn, insn))
31599 return 17;
31600 break;
31602 case TYPE_DIV:
31604 if (set_to_load_agen (dep_insn, insn))
31605 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31606 break;
31608 default:
31609 break;
31612 break;
31614 case TYPE_FPLOAD:
31615 if ((rs6000_cpu == PROCESSOR_POWER6)
31616 && get_attr_update (insn) == UPDATE_NO
31617 && recog_memoized (dep_insn)
31618 && (INSN_CODE (dep_insn) >= 0)
31619 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
31620 return 2;
31622 default:
31623 break;
31626 /* Fall out to return default cost. */
31628 break;
31630 case REG_DEP_OUTPUT:
31631 /* Output dependency; DEP_INSN writes a register that INSN writes some
31632 cycles later. */
31633 if ((rs6000_cpu == PROCESSOR_POWER6)
31634 && recog_memoized (dep_insn)
31635 && (INSN_CODE (dep_insn) >= 0))
31637 attr_type = get_attr_type (insn);
31639 switch (attr_type)
31641 case TYPE_FP:
31642 case TYPE_FPSIMPLE:
31643 if (get_attr_type (dep_insn) == TYPE_FP
31644 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
31645 return 1;
31646 break;
31647 case TYPE_FPLOAD:
31648 if (get_attr_update (insn) == UPDATE_NO
31649 && get_attr_type (dep_insn) == TYPE_MFFGPR)
31650 return 2;
31651 break;
31652 default:
31653 break;
31656 /* Fall through, no cost for output dependency. */
31657 /* FALLTHRU */
31659 case REG_DEP_ANTI:
31660 /* Anti dependency; DEP_INSN reads a register that INSN writes some
31661 cycles later. */
31662 return 0;
31664 default:
31665 gcc_unreachable ();
31668 return cost;
31671 /* Debug version of rs6000_adjust_cost. */
31673 static int
31674 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
31675 int cost, unsigned int dw)
31677 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
31679 if (ret != cost)
31681 const char *dep;
31683 switch (dep_type)
31685 default: dep = "unknown depencency"; break;
31686 case REG_DEP_TRUE: dep = "data dependency"; break;
31687 case REG_DEP_OUTPUT: dep = "output dependency"; break;
31688 case REG_DEP_ANTI: dep = "anti depencency"; break;
31691 fprintf (stderr,
31692 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
31693 "%s, insn:\n", ret, cost, dep);
31695 debug_rtx (insn);
31698 return ret;
31701 /* The function returns a true if INSN is microcoded.
31702 Return false otherwise. */
31704 static bool
31705 is_microcoded_insn (rtx_insn *insn)
31707 if (!insn || !NONDEBUG_INSN_P (insn)
31708 || GET_CODE (PATTERN (insn)) == USE
31709 || GET_CODE (PATTERN (insn)) == CLOBBER)
31710 return false;
31712 if (rs6000_cpu_attr == CPU_CELL)
31713 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
31715 if (rs6000_sched_groups
31716 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31718 enum attr_type type = get_attr_type (insn);
31719 if ((type == TYPE_LOAD
31720 && get_attr_update (insn) == UPDATE_YES
31721 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
31722 || ((type == TYPE_LOAD || type == TYPE_STORE)
31723 && get_attr_update (insn) == UPDATE_YES
31724 && get_attr_indexed (insn) == INDEXED_YES)
31725 || type == TYPE_MFCR)
31726 return true;
31729 return false;
31732 /* The function returns true if INSN is cracked into 2 instructions
31733 by the processor (and therefore occupies 2 issue slots). */
31735 static bool
31736 is_cracked_insn (rtx_insn *insn)
31738 if (!insn || !NONDEBUG_INSN_P (insn)
31739 || GET_CODE (PATTERN (insn)) == USE
31740 || GET_CODE (PATTERN (insn)) == CLOBBER)
31741 return false;
31743 if (rs6000_sched_groups
31744 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31746 enum attr_type type = get_attr_type (insn);
31747 if ((type == TYPE_LOAD
31748 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31749 && get_attr_update (insn) == UPDATE_NO)
31750 || (type == TYPE_LOAD
31751 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
31752 && get_attr_update (insn) == UPDATE_YES
31753 && get_attr_indexed (insn) == INDEXED_NO)
31754 || (type == TYPE_STORE
31755 && get_attr_update (insn) == UPDATE_YES
31756 && get_attr_indexed (insn) == INDEXED_NO)
31757 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
31758 && get_attr_update (insn) == UPDATE_YES)
31759 || type == TYPE_DELAYED_CR
31760 || (type == TYPE_EXTS
31761 && get_attr_dot (insn) == DOT_YES)
31762 || (type == TYPE_SHIFT
31763 && get_attr_dot (insn) == DOT_YES
31764 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
31765 || (type == TYPE_MUL
31766 && get_attr_dot (insn) == DOT_YES)
31767 || type == TYPE_DIV
31768 || (type == TYPE_INSERT
31769 && get_attr_size (insn) == SIZE_32))
31770 return true;
31773 return false;
31776 /* The function returns true if INSN can be issued only from
31777 the branch slot. */
31779 static bool
31780 is_branch_slot_insn (rtx_insn *insn)
31782 if (!insn || !NONDEBUG_INSN_P (insn)
31783 || GET_CODE (PATTERN (insn)) == USE
31784 || GET_CODE (PATTERN (insn)) == CLOBBER)
31785 return false;
31787 if (rs6000_sched_groups)
31789 enum attr_type type = get_attr_type (insn);
31790 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
31791 return true;
31792 return false;
31795 return false;
31798 /* The function returns true if out_inst sets a value that is
31799 used in the address generation computation of in_insn */
31800 static bool
31801 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
31803 rtx out_set, in_set;
31805 /* For performance reasons, only handle the simple case where
31806 both loads are a single_set. */
31807 out_set = single_set (out_insn);
31808 if (out_set)
31810 in_set = single_set (in_insn);
31811 if (in_set)
31812 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
31815 return false;
31818 /* Try to determine base/offset/size parts of the given MEM.
31819 Return true if successful, false if all the values couldn't
31820 be determined.
31822 This function only looks for REG or REG+CONST address forms.
31823 REG+REG address form will return false. */
31825 static bool
31826 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
31827 HOST_WIDE_INT *size)
31829 rtx addr_rtx;
31830 if MEM_SIZE_KNOWN_P (mem)
31831 *size = MEM_SIZE (mem);
31832 else
31833 return false;
31835 addr_rtx = (XEXP (mem, 0));
31836 if (GET_CODE (addr_rtx) == PRE_MODIFY)
31837 addr_rtx = XEXP (addr_rtx, 1);
31839 *offset = 0;
31840 while (GET_CODE (addr_rtx) == PLUS
31841 && CONST_INT_P (XEXP (addr_rtx, 1)))
31843 *offset += INTVAL (XEXP (addr_rtx, 1));
31844 addr_rtx = XEXP (addr_rtx, 0);
31846 if (!REG_P (addr_rtx))
31847 return false;
31849 *base = addr_rtx;
31850 return true;
31853 /* The function returns true if the target storage location of
31854 mem1 is adjacent to the target storage location of mem2 */
31855 /* Return 1 if memory locations are adjacent. */
31857 static bool
31858 adjacent_mem_locations (rtx mem1, rtx mem2)
31860 rtx reg1, reg2;
31861 HOST_WIDE_INT off1, size1, off2, size2;
31863 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31864 && get_memref_parts (mem2, &reg2, &off2, &size2))
31865 return ((REGNO (reg1) == REGNO (reg2))
31866 && ((off1 + size1 == off2)
31867 || (off2 + size2 == off1)));
31869 return false;
31872 /* This function returns true if it can be determined that the two MEM
31873 locations overlap by at least 1 byte based on base reg/offset/size. */
31875 static bool
31876 mem_locations_overlap (rtx mem1, rtx mem2)
31878 rtx reg1, reg2;
31879 HOST_WIDE_INT off1, size1, off2, size2;
31881 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31882 && get_memref_parts (mem2, &reg2, &off2, &size2))
31883 return ((REGNO (reg1) == REGNO (reg2))
31884 && (((off1 <= off2) && (off1 + size1 > off2))
31885 || ((off2 <= off1) && (off2 + size2 > off1))));
31887 return false;
31890 /* A C statement (sans semicolon) to update the integer scheduling
31891 priority INSN_PRIORITY (INSN). Increase the priority to execute the
31892 INSN earlier, reduce the priority to execute INSN later. Do not
31893 define this macro if you do not need to adjust the scheduling
31894 priorities of insns. */
31896 static int
31897 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
31899 rtx load_mem, str_mem;
31900 /* On machines (like the 750) which have asymmetric integer units,
31901 where one integer unit can do multiply and divides and the other
31902 can't, reduce the priority of multiply/divide so it is scheduled
31903 before other integer operations. */
31905 #if 0
31906 if (! INSN_P (insn))
31907 return priority;
31909 if (GET_CODE (PATTERN (insn)) == USE)
31910 return priority;
31912 switch (rs6000_cpu_attr) {
31913 case CPU_PPC750:
31914 switch (get_attr_type (insn))
31916 default:
31917 break;
31919 case TYPE_MUL:
31920 case TYPE_DIV:
31921 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
31922 priority, priority);
31923 if (priority >= 0 && priority < 0x01000000)
31924 priority >>= 3;
31925 break;
31928 #endif
31930 if (insn_must_be_first_in_group (insn)
31931 && reload_completed
31932 && current_sched_info->sched_max_insns_priority
31933 && rs6000_sched_restricted_insns_priority)
31936 /* Prioritize insns that can be dispatched only in the first
31937 dispatch slot. */
31938 if (rs6000_sched_restricted_insns_priority == 1)
31939 /* Attach highest priority to insn. This means that in
31940 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
31941 precede 'priority' (critical path) considerations. */
31942 return current_sched_info->sched_max_insns_priority;
31943 else if (rs6000_sched_restricted_insns_priority == 2)
31944 /* Increase priority of insn by a minimal amount. This means that in
31945 haifa-sched.c:ready_sort(), only 'priority' (critical path)
31946 considerations precede dispatch-slot restriction considerations. */
31947 return (priority + 1);
31950 if (rs6000_cpu == PROCESSOR_POWER6
31951 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
31952 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
31953 /* Attach highest priority to insn if the scheduler has just issued two
31954 stores and this instruction is a load, or two loads and this instruction
31955 is a store. Power6 wants loads and stores scheduled alternately
31956 when possible */
31957 return current_sched_info->sched_max_insns_priority;
31959 return priority;
31962 /* Return true if the instruction is nonpipelined on the Cell. */
31963 static bool
31964 is_nonpipeline_insn (rtx_insn *insn)
31966 enum attr_type type;
31967 if (!insn || !NONDEBUG_INSN_P (insn)
31968 || GET_CODE (PATTERN (insn)) == USE
31969 || GET_CODE (PATTERN (insn)) == CLOBBER)
31970 return false;
31972 type = get_attr_type (insn);
31973 if (type == TYPE_MUL
31974 || type == TYPE_DIV
31975 || type == TYPE_SDIV
31976 || type == TYPE_DDIV
31977 || type == TYPE_SSQRT
31978 || type == TYPE_DSQRT
31979 || type == TYPE_MFCR
31980 || type == TYPE_MFCRF
31981 || type == TYPE_MFJMPR)
31983 return true;
31985 return false;
31989 /* Return how many instructions the machine can issue per cycle. */
31991 static int
31992 rs6000_issue_rate (void)
31994 /* Unless scheduling for register pressure, use issue rate of 1 for
31995 first scheduling pass to decrease degradation. */
31996 if (!reload_completed && !flag_sched_pressure)
31997 return 1;
31999 switch (rs6000_cpu_attr) {
32000 case CPU_RS64A:
32001 case CPU_PPC601: /* ? */
32002 case CPU_PPC7450:
32003 return 3;
32004 case CPU_PPC440:
32005 case CPU_PPC603:
32006 case CPU_PPC750:
32007 case CPU_PPC7400:
32008 case CPU_PPC8540:
32009 case CPU_PPC8548:
32010 case CPU_CELL:
32011 case CPU_PPCE300C2:
32012 case CPU_PPCE300C3:
32013 case CPU_PPCE500MC:
32014 case CPU_PPCE500MC64:
32015 case CPU_PPCE5500:
32016 case CPU_PPCE6500:
32017 case CPU_TITAN:
32018 return 2;
32019 case CPU_PPC476:
32020 case CPU_PPC604:
32021 case CPU_PPC604E:
32022 case CPU_PPC620:
32023 case CPU_PPC630:
32024 return 4;
32025 case CPU_POWER4:
32026 case CPU_POWER5:
32027 case CPU_POWER6:
32028 case CPU_POWER7:
32029 return 5;
32030 case CPU_POWER8:
32031 return 7;
32032 case CPU_POWER9:
32033 return 6;
32034 default:
32035 return 1;
32039 /* Return how many instructions to look ahead for better insn
32040 scheduling. */
32042 static int
32043 rs6000_use_sched_lookahead (void)
32045 switch (rs6000_cpu_attr)
32047 case CPU_PPC8540:
32048 case CPU_PPC8548:
32049 return 4;
32051 case CPU_CELL:
32052 return (reload_completed ? 8 : 0);
32054 default:
32055 return 0;
32059 /* We are choosing insn from the ready queue. Return zero if INSN can be
32060 chosen. */
32061 static int
32062 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
32064 if (ready_index == 0)
32065 return 0;
32067 if (rs6000_cpu_attr != CPU_CELL)
32068 return 0;
32070 gcc_assert (insn != NULL_RTX && INSN_P (insn));
32072 if (!reload_completed
32073 || is_nonpipeline_insn (insn)
32074 || is_microcoded_insn (insn))
32075 return 1;
32077 return 0;
32080 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
32081 and return true. */
32083 static bool
32084 find_mem_ref (rtx pat, rtx *mem_ref)
32086 const char * fmt;
32087 int i, j;
32089 /* stack_tie does not produce any real memory traffic. */
32090 if (tie_operand (pat, VOIDmode))
32091 return false;
32093 if (GET_CODE (pat) == MEM)
32095 *mem_ref = pat;
32096 return true;
32099 /* Recursively process the pattern. */
32100 fmt = GET_RTX_FORMAT (GET_CODE (pat));
32102 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
32104 if (fmt[i] == 'e')
32106 if (find_mem_ref (XEXP (pat, i), mem_ref))
32107 return true;
32109 else if (fmt[i] == 'E')
32110 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
32112 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
32113 return true;
32117 return false;
32120 /* Determine if PAT is a PATTERN of a load insn. */
32122 static bool
32123 is_load_insn1 (rtx pat, rtx *load_mem)
32125 if (!pat || pat == NULL_RTX)
32126 return false;
32128 if (GET_CODE (pat) == SET)
32129 return find_mem_ref (SET_SRC (pat), load_mem);
32131 if (GET_CODE (pat) == PARALLEL)
32133 int i;
32135 for (i = 0; i < XVECLEN (pat, 0); i++)
32136 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
32137 return true;
32140 return false;
32143 /* Determine if INSN loads from memory. */
32145 static bool
32146 is_load_insn (rtx insn, rtx *load_mem)
32148 if (!insn || !INSN_P (insn))
32149 return false;
32151 if (CALL_P (insn))
32152 return false;
32154 return is_load_insn1 (PATTERN (insn), load_mem);
32157 /* Determine if PAT is a PATTERN of a store insn. */
32159 static bool
32160 is_store_insn1 (rtx pat, rtx *str_mem)
32162 if (!pat || pat == NULL_RTX)
32163 return false;
32165 if (GET_CODE (pat) == SET)
32166 return find_mem_ref (SET_DEST (pat), str_mem);
32168 if (GET_CODE (pat) == PARALLEL)
32170 int i;
32172 for (i = 0; i < XVECLEN (pat, 0); i++)
32173 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
32174 return true;
32177 return false;
32180 /* Determine if INSN stores to memory. */
32182 static bool
32183 is_store_insn (rtx insn, rtx *str_mem)
32185 if (!insn || !INSN_P (insn))
32186 return false;
32188 return is_store_insn1 (PATTERN (insn), str_mem);
32191 /* Return whether TYPE is a Power9 pairable vector instruction type. */
32193 static bool
32194 is_power9_pairable_vec_type (enum attr_type type)
32196 switch (type)
32198 case TYPE_VECSIMPLE:
32199 case TYPE_VECCOMPLEX:
32200 case TYPE_VECDIV:
32201 case TYPE_VECCMP:
32202 case TYPE_VECPERM:
32203 case TYPE_VECFLOAT:
32204 case TYPE_VECFDIV:
32205 case TYPE_VECDOUBLE:
32206 return true;
32207 default:
32208 break;
32210 return false;
32213 /* Returns whether the dependence between INSN and NEXT is considered
32214 costly by the given target. */
32216 static bool
32217 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
32219 rtx insn;
32220 rtx next;
32221 rtx load_mem, str_mem;
32223 /* If the flag is not enabled - no dependence is considered costly;
32224 allow all dependent insns in the same group.
32225 This is the most aggressive option. */
32226 if (rs6000_sched_costly_dep == no_dep_costly)
32227 return false;
32229 /* If the flag is set to 1 - a dependence is always considered costly;
32230 do not allow dependent instructions in the same group.
32231 This is the most conservative option. */
32232 if (rs6000_sched_costly_dep == all_deps_costly)
32233 return true;
32235 insn = DEP_PRO (dep);
32236 next = DEP_CON (dep);
32238 if (rs6000_sched_costly_dep == store_to_load_dep_costly
32239 && is_load_insn (next, &load_mem)
32240 && is_store_insn (insn, &str_mem))
32241 /* Prevent load after store in the same group. */
32242 return true;
32244 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
32245 && is_load_insn (next, &load_mem)
32246 && is_store_insn (insn, &str_mem)
32247 && DEP_TYPE (dep) == REG_DEP_TRUE
32248 && mem_locations_overlap(str_mem, load_mem))
32249 /* Prevent load after store in the same group if it is a true
32250 dependence. */
32251 return true;
32253 /* The flag is set to X; dependences with latency >= X are considered costly,
32254 and will not be scheduled in the same group. */
32255 if (rs6000_sched_costly_dep <= max_dep_latency
32256 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
32257 return true;
32259 return false;
32262 /* Return the next insn after INSN that is found before TAIL is reached,
32263 skipping any "non-active" insns - insns that will not actually occupy
32264 an issue slot. Return NULL_RTX if such an insn is not found. */
32266 static rtx_insn *
32267 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
32269 if (insn == NULL_RTX || insn == tail)
32270 return NULL;
32272 while (1)
32274 insn = NEXT_INSN (insn);
32275 if (insn == NULL_RTX || insn == tail)
32276 return NULL;
32278 if (CALL_P (insn)
32279 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
32280 || (NONJUMP_INSN_P (insn)
32281 && GET_CODE (PATTERN (insn)) != USE
32282 && GET_CODE (PATTERN (insn)) != CLOBBER
32283 && INSN_CODE (insn) != CODE_FOR_stack_tie))
32284 break;
32286 return insn;
32289 /* Do Power9 specific sched_reorder2 reordering of ready list. */
32291 static int
32292 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
32294 int pos;
32295 int i;
32296 rtx_insn *tmp;
32297 enum attr_type type;
32299 type = get_attr_type (last_scheduled_insn);
32301 /* Try to issue fixed point divides back-to-back in pairs so they will be
32302 routed to separate execution units and execute in parallel. */
32303 if (type == TYPE_DIV && divide_cnt == 0)
32305 /* First divide has been scheduled. */
32306 divide_cnt = 1;
32308 /* Scan the ready list looking for another divide, if found move it
32309 to the end of the list so it is chosen next. */
32310 pos = lastpos;
32311 while (pos >= 0)
32313 if (recog_memoized (ready[pos]) >= 0
32314 && get_attr_type (ready[pos]) == TYPE_DIV)
32316 tmp = ready[pos];
32317 for (i = pos; i < lastpos; i++)
32318 ready[i] = ready[i + 1];
32319 ready[lastpos] = tmp;
32320 break;
32322 pos--;
32325 else
32327 /* Last insn was the 2nd divide or not a divide, reset the counter. */
32328 divide_cnt = 0;
32330 /* Power9 can execute 2 vector operations and 2 vector loads in a single
32331 cycle. So try to pair up and alternate groups of vector and vector
32332 load instructions.
32334 To aid this formation, a counter is maintained to keep track of
32335 vec/vecload insns issued. The value of vec_load_pendulum maintains
32336 the current state with the following values:
32338 0 : Initial state, no vec/vecload group has been started.
32340 -1 : 1 vector load has been issued and another has been found on
32341 the ready list and moved to the end.
32343 -2 : 2 vector loads have been issued and a vector operation has
32344 been found and moved to the end of the ready list.
32346 -3 : 2 vector loads and a vector insn have been issued and a
32347 vector operation has been found and moved to the end of the
32348 ready list.
32350 1 : 1 vector insn has been issued and another has been found and
32351 moved to the end of the ready list.
32353 2 : 2 vector insns have been issued and a vector load has been
32354 found and moved to the end of the ready list.
32356 3 : 2 vector insns and a vector load have been issued and another
32357 vector load has been found and moved to the end of the ready
32358 list. */
32359 if (type == TYPE_VECLOAD)
32361 /* Issued a vecload. */
32362 if (vec_load_pendulum == 0)
32364 /* We issued a single vecload, look for another and move it to
32365 the end of the ready list so it will be scheduled next.
32366 Set pendulum if found. */
32367 pos = lastpos;
32368 while (pos >= 0)
32370 if (recog_memoized (ready[pos]) >= 0
32371 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32373 tmp = ready[pos];
32374 for (i = pos; i < lastpos; i++)
32375 ready[i] = ready[i + 1];
32376 ready[lastpos] = tmp;
32377 vec_load_pendulum = -1;
32378 return cached_can_issue_more;
32380 pos--;
32383 else if (vec_load_pendulum == -1)
32385 /* This is the second vecload we've issued, search the ready
32386 list for a vector operation so we can try to schedule a
32387 pair of those next. If found move to the end of the ready
32388 list so it is scheduled next and set the pendulum. */
32389 pos = lastpos;
32390 while (pos >= 0)
32392 if (recog_memoized (ready[pos]) >= 0
32393 && is_power9_pairable_vec_type (
32394 get_attr_type (ready[pos])))
32396 tmp = ready[pos];
32397 for (i = pos; i < lastpos; i++)
32398 ready[i] = ready[i + 1];
32399 ready[lastpos] = tmp;
32400 vec_load_pendulum = -2;
32401 return cached_can_issue_more;
32403 pos--;
32406 else if (vec_load_pendulum == 2)
32408 /* Two vector ops have been issued and we've just issued a
32409 vecload, look for another vecload and move to end of ready
32410 list if found. */
32411 pos = lastpos;
32412 while (pos >= 0)
32414 if (recog_memoized (ready[pos]) >= 0
32415 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32417 tmp = ready[pos];
32418 for (i = pos; i < lastpos; i++)
32419 ready[i] = ready[i + 1];
32420 ready[lastpos] = tmp;
32421 /* Set pendulum so that next vecload will be seen as
32422 finishing a group, not start of one. */
32423 vec_load_pendulum = 3;
32424 return cached_can_issue_more;
32426 pos--;
32430 else if (is_power9_pairable_vec_type (type))
32432 /* Issued a vector operation. */
32433 if (vec_load_pendulum == 0)
32434 /* We issued a single vec op, look for another and move it
32435 to the end of the ready list so it will be scheduled next.
32436 Set pendulum if found. */
32438 pos = lastpos;
32439 while (pos >= 0)
32441 if (recog_memoized (ready[pos]) >= 0
32442 && is_power9_pairable_vec_type (
32443 get_attr_type (ready[pos])))
32445 tmp = ready[pos];
32446 for (i = pos; i < lastpos; i++)
32447 ready[i] = ready[i + 1];
32448 ready[lastpos] = tmp;
32449 vec_load_pendulum = 1;
32450 return cached_can_issue_more;
32452 pos--;
32455 else if (vec_load_pendulum == 1)
32457 /* This is the second vec op we've issued, search the ready
32458 list for a vecload operation so we can try to schedule a
32459 pair of those next. If found move to the end of the ready
32460 list so it is scheduled next and set the pendulum. */
32461 pos = lastpos;
32462 while (pos >= 0)
32464 if (recog_memoized (ready[pos]) >= 0
32465 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32467 tmp = ready[pos];
32468 for (i = pos; i < lastpos; i++)
32469 ready[i] = ready[i + 1];
32470 ready[lastpos] = tmp;
32471 vec_load_pendulum = 2;
32472 return cached_can_issue_more;
32474 pos--;
32477 else if (vec_load_pendulum == -2)
32479 /* Two vecload ops have been issued and we've just issued a
32480 vec op, look for another vec op and move to end of ready
32481 list if found. */
32482 pos = lastpos;
32483 while (pos >= 0)
32485 if (recog_memoized (ready[pos]) >= 0
32486 && is_power9_pairable_vec_type (
32487 get_attr_type (ready[pos])))
32489 tmp = ready[pos];
32490 for (i = pos; i < lastpos; i++)
32491 ready[i] = ready[i + 1];
32492 ready[lastpos] = tmp;
32493 /* Set pendulum so that next vec op will be seen as
32494 finishing a group, not start of one. */
32495 vec_load_pendulum = -3;
32496 return cached_can_issue_more;
32498 pos--;
32503 /* We've either finished a vec/vecload group, couldn't find an insn to
32504 continue the current group, or the last insn had nothing to do with
32505 with a group. In any case, reset the pendulum. */
32506 vec_load_pendulum = 0;
32509 return cached_can_issue_more;
32512 /* We are about to begin issuing insns for this clock cycle. */
32514 static int
32515 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
32516 rtx_insn **ready ATTRIBUTE_UNUSED,
32517 int *pn_ready ATTRIBUTE_UNUSED,
32518 int clock_var ATTRIBUTE_UNUSED)
32520 int n_ready = *pn_ready;
32522 if (sched_verbose)
32523 fprintf (dump, "// rs6000_sched_reorder :\n");
32525 /* Reorder the ready list, if the second to last ready insn
32526 is a nonepipeline insn. */
32527 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
32529 if (is_nonpipeline_insn (ready[n_ready - 1])
32530 && (recog_memoized (ready[n_ready - 2]) > 0))
32531 /* Simply swap first two insns. */
32532 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
32535 if (rs6000_cpu == PROCESSOR_POWER6)
32536 load_store_pendulum = 0;
32538 return rs6000_issue_rate ();
32541 /* Like rs6000_sched_reorder, but called after issuing each insn. */
32543 static int
32544 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
32545 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
32547 if (sched_verbose)
32548 fprintf (dump, "// rs6000_sched_reorder2 :\n");
32550 /* For Power6, we need to handle some special cases to try and keep the
32551 store queue from overflowing and triggering expensive flushes.
32553 This code monitors how load and store instructions are being issued
32554 and skews the ready list one way or the other to increase the likelihood
32555 that a desired instruction is issued at the proper time.
32557 A couple of things are done. First, we maintain a "load_store_pendulum"
32558 to track the current state of load/store issue.
32560 - If the pendulum is at zero, then no loads or stores have been
32561 issued in the current cycle so we do nothing.
32563 - If the pendulum is 1, then a single load has been issued in this
32564 cycle and we attempt to locate another load in the ready list to
32565 issue with it.
32567 - If the pendulum is -2, then two stores have already been
32568 issued in this cycle, so we increase the priority of the first load
32569 in the ready list to increase it's likelihood of being chosen first
32570 in the next cycle.
32572 - If the pendulum is -1, then a single store has been issued in this
32573 cycle and we attempt to locate another store in the ready list to
32574 issue with it, preferring a store to an adjacent memory location to
32575 facilitate store pairing in the store queue.
32577 - If the pendulum is 2, then two loads have already been
32578 issued in this cycle, so we increase the priority of the first store
32579 in the ready list to increase it's likelihood of being chosen first
32580 in the next cycle.
32582 - If the pendulum < -2 or > 2, then do nothing.
32584 Note: This code covers the most common scenarios. There exist non
32585 load/store instructions which make use of the LSU and which
32586 would need to be accounted for to strictly model the behavior
32587 of the machine. Those instructions are currently unaccounted
32588 for to help minimize compile time overhead of this code.
32590 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
32592 int pos;
32593 int i;
32594 rtx_insn *tmp;
32595 rtx load_mem, str_mem;
32597 if (is_store_insn (last_scheduled_insn, &str_mem))
32598 /* Issuing a store, swing the load_store_pendulum to the left */
32599 load_store_pendulum--;
32600 else if (is_load_insn (last_scheduled_insn, &load_mem))
32601 /* Issuing a load, swing the load_store_pendulum to the right */
32602 load_store_pendulum++;
32603 else
32604 return cached_can_issue_more;
32606 /* If the pendulum is balanced, or there is only one instruction on
32607 the ready list, then all is well, so return. */
32608 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
32609 return cached_can_issue_more;
32611 if (load_store_pendulum == 1)
32613 /* A load has been issued in this cycle. Scan the ready list
32614 for another load to issue with it */
32615 pos = *pn_ready-1;
32617 while (pos >= 0)
32619 if (is_load_insn (ready[pos], &load_mem))
32621 /* Found a load. Move it to the head of the ready list,
32622 and adjust it's priority so that it is more likely to
32623 stay there */
32624 tmp = ready[pos];
32625 for (i=pos; i<*pn_ready-1; i++)
32626 ready[i] = ready[i + 1];
32627 ready[*pn_ready-1] = tmp;
32629 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32630 INSN_PRIORITY (tmp)++;
32631 break;
32633 pos--;
32636 else if (load_store_pendulum == -2)
32638 /* Two stores have been issued in this cycle. Increase the
32639 priority of the first load in the ready list to favor it for
32640 issuing in the next cycle. */
32641 pos = *pn_ready-1;
32643 while (pos >= 0)
32645 if (is_load_insn (ready[pos], &load_mem)
32646 && !sel_sched_p ()
32647 && INSN_PRIORITY_KNOWN (ready[pos]))
32649 INSN_PRIORITY (ready[pos])++;
32651 /* Adjust the pendulum to account for the fact that a load
32652 was found and increased in priority. This is to prevent
32653 increasing the priority of multiple loads */
32654 load_store_pendulum--;
32656 break;
32658 pos--;
32661 else if (load_store_pendulum == -1)
32663 /* A store has been issued in this cycle. Scan the ready list for
32664 another store to issue with it, preferring a store to an adjacent
32665 memory location */
32666 int first_store_pos = -1;
32668 pos = *pn_ready-1;
32670 while (pos >= 0)
32672 if (is_store_insn (ready[pos], &str_mem))
32674 rtx str_mem2;
32675 /* Maintain the index of the first store found on the
32676 list */
32677 if (first_store_pos == -1)
32678 first_store_pos = pos;
32680 if (is_store_insn (last_scheduled_insn, &str_mem2)
32681 && adjacent_mem_locations (str_mem, str_mem2))
32683 /* Found an adjacent store. Move it to the head of the
32684 ready list, and adjust it's priority so that it is
32685 more likely to stay there */
32686 tmp = ready[pos];
32687 for (i=pos; i<*pn_ready-1; i++)
32688 ready[i] = ready[i + 1];
32689 ready[*pn_ready-1] = tmp;
32691 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32692 INSN_PRIORITY (tmp)++;
32694 first_store_pos = -1;
32696 break;
32699 pos--;
32702 if (first_store_pos >= 0)
32704 /* An adjacent store wasn't found, but a non-adjacent store was,
32705 so move the non-adjacent store to the front of the ready
32706 list, and adjust its priority so that it is more likely to
32707 stay there. */
32708 tmp = ready[first_store_pos];
32709 for (i=first_store_pos; i<*pn_ready-1; i++)
32710 ready[i] = ready[i + 1];
32711 ready[*pn_ready-1] = tmp;
32712 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32713 INSN_PRIORITY (tmp)++;
32716 else if (load_store_pendulum == 2)
32718 /* Two loads have been issued in this cycle. Increase the priority
32719 of the first store in the ready list to favor it for issuing in
32720 the next cycle. */
32721 pos = *pn_ready-1;
32723 while (pos >= 0)
32725 if (is_store_insn (ready[pos], &str_mem)
32726 && !sel_sched_p ()
32727 && INSN_PRIORITY_KNOWN (ready[pos]))
32729 INSN_PRIORITY (ready[pos])++;
32731 /* Adjust the pendulum to account for the fact that a store
32732 was found and increased in priority. This is to prevent
32733 increasing the priority of multiple stores */
32734 load_store_pendulum++;
32736 break;
32738 pos--;
32743 /* Do Power9 dependent reordering if necessary. */
32744 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
32745 && recog_memoized (last_scheduled_insn) >= 0)
32746 return power9_sched_reorder2 (ready, *pn_ready - 1);
32748 return cached_can_issue_more;
32751 /* Return whether the presence of INSN causes a dispatch group termination
32752 of group WHICH_GROUP.
32754 If WHICH_GROUP == current_group, this function will return true if INSN
32755 causes the termination of the current group (i.e, the dispatch group to
32756 which INSN belongs). This means that INSN will be the last insn in the
32757 group it belongs to.
32759 If WHICH_GROUP == previous_group, this function will return true if INSN
32760 causes the termination of the previous group (i.e, the dispatch group that
32761 precedes the group to which INSN belongs). This means that INSN will be
32762 the first insn in the group it belongs to). */
32764 static bool
32765 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
32767 bool first, last;
32769 if (! insn)
32770 return false;
32772 first = insn_must_be_first_in_group (insn);
32773 last = insn_must_be_last_in_group (insn);
32775 if (first && last)
32776 return true;
32778 if (which_group == current_group)
32779 return last;
32780 else if (which_group == previous_group)
32781 return first;
32783 return false;
32787 static bool
32788 insn_must_be_first_in_group (rtx_insn *insn)
32790 enum attr_type type;
32792 if (!insn
32793 || NOTE_P (insn)
32794 || DEBUG_INSN_P (insn)
32795 || GET_CODE (PATTERN (insn)) == USE
32796 || GET_CODE (PATTERN (insn)) == CLOBBER)
32797 return false;
32799 switch (rs6000_cpu)
32801 case PROCESSOR_POWER5:
32802 if (is_cracked_insn (insn))
32803 return true;
32804 /* FALLTHRU */
32805 case PROCESSOR_POWER4:
32806 if (is_microcoded_insn (insn))
32807 return true;
32809 if (!rs6000_sched_groups)
32810 return false;
32812 type = get_attr_type (insn);
32814 switch (type)
32816 case TYPE_MFCR:
32817 case TYPE_MFCRF:
32818 case TYPE_MTCR:
32819 case TYPE_DELAYED_CR:
32820 case TYPE_CR_LOGICAL:
32821 case TYPE_MTJMPR:
32822 case TYPE_MFJMPR:
32823 case TYPE_DIV:
32824 case TYPE_LOAD_L:
32825 case TYPE_STORE_C:
32826 case TYPE_ISYNC:
32827 case TYPE_SYNC:
32828 return true;
32829 default:
32830 break;
32832 break;
32833 case PROCESSOR_POWER6:
32834 type = get_attr_type (insn);
32836 switch (type)
32838 case TYPE_EXTS:
32839 case TYPE_CNTLZ:
32840 case TYPE_TRAP:
32841 case TYPE_MUL:
32842 case TYPE_INSERT:
32843 case TYPE_FPCOMPARE:
32844 case TYPE_MFCR:
32845 case TYPE_MTCR:
32846 case TYPE_MFJMPR:
32847 case TYPE_MTJMPR:
32848 case TYPE_ISYNC:
32849 case TYPE_SYNC:
32850 case TYPE_LOAD_L:
32851 case TYPE_STORE_C:
32852 return true;
32853 case TYPE_SHIFT:
32854 if (get_attr_dot (insn) == DOT_NO
32855 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
32856 return true;
32857 else
32858 break;
32859 case TYPE_DIV:
32860 if (get_attr_size (insn) == SIZE_32)
32861 return true;
32862 else
32863 break;
32864 case TYPE_LOAD:
32865 case TYPE_STORE:
32866 case TYPE_FPLOAD:
32867 case TYPE_FPSTORE:
32868 if (get_attr_update (insn) == UPDATE_YES)
32869 return true;
32870 else
32871 break;
32872 default:
32873 break;
32875 break;
32876 case PROCESSOR_POWER7:
32877 type = get_attr_type (insn);
32879 switch (type)
32881 case TYPE_CR_LOGICAL:
32882 case TYPE_MFCR:
32883 case TYPE_MFCRF:
32884 case TYPE_MTCR:
32885 case TYPE_DIV:
32886 case TYPE_ISYNC:
32887 case TYPE_LOAD_L:
32888 case TYPE_STORE_C:
32889 case TYPE_MFJMPR:
32890 case TYPE_MTJMPR:
32891 return true;
32892 case TYPE_MUL:
32893 case TYPE_SHIFT:
32894 case TYPE_EXTS:
32895 if (get_attr_dot (insn) == DOT_YES)
32896 return true;
32897 else
32898 break;
32899 case TYPE_LOAD:
32900 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32901 || get_attr_update (insn) == UPDATE_YES)
32902 return true;
32903 else
32904 break;
32905 case TYPE_STORE:
32906 case TYPE_FPLOAD:
32907 case TYPE_FPSTORE:
32908 if (get_attr_update (insn) == UPDATE_YES)
32909 return true;
32910 else
32911 break;
32912 default:
32913 break;
32915 break;
32916 case PROCESSOR_POWER8:
32917 type = get_attr_type (insn);
32919 switch (type)
32921 case TYPE_CR_LOGICAL:
32922 case TYPE_DELAYED_CR:
32923 case TYPE_MFCR:
32924 case TYPE_MFCRF:
32925 case TYPE_MTCR:
32926 case TYPE_SYNC:
32927 case TYPE_ISYNC:
32928 case TYPE_LOAD_L:
32929 case TYPE_STORE_C:
32930 case TYPE_VECSTORE:
32931 case TYPE_MFJMPR:
32932 case TYPE_MTJMPR:
32933 return true;
32934 case TYPE_SHIFT:
32935 case TYPE_EXTS:
32936 case TYPE_MUL:
32937 if (get_attr_dot (insn) == DOT_YES)
32938 return true;
32939 else
32940 break;
32941 case TYPE_LOAD:
32942 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32943 || get_attr_update (insn) == UPDATE_YES)
32944 return true;
32945 else
32946 break;
32947 case TYPE_STORE:
32948 if (get_attr_update (insn) == UPDATE_YES
32949 && get_attr_indexed (insn) == INDEXED_YES)
32950 return true;
32951 else
32952 break;
32953 default:
32954 break;
32956 break;
32957 default:
32958 break;
32961 return false;
32964 static bool
32965 insn_must_be_last_in_group (rtx_insn *insn)
32967 enum attr_type type;
32969 if (!insn
32970 || NOTE_P (insn)
32971 || DEBUG_INSN_P (insn)
32972 || GET_CODE (PATTERN (insn)) == USE
32973 || GET_CODE (PATTERN (insn)) == CLOBBER)
32974 return false;
32976 switch (rs6000_cpu) {
32977 case PROCESSOR_POWER4:
32978 case PROCESSOR_POWER5:
32979 if (is_microcoded_insn (insn))
32980 return true;
32982 if (is_branch_slot_insn (insn))
32983 return true;
32985 break;
32986 case PROCESSOR_POWER6:
32987 type = get_attr_type (insn);
32989 switch (type)
32991 case TYPE_EXTS:
32992 case TYPE_CNTLZ:
32993 case TYPE_TRAP:
32994 case TYPE_MUL:
32995 case TYPE_FPCOMPARE:
32996 case TYPE_MFCR:
32997 case TYPE_MTCR:
32998 case TYPE_MFJMPR:
32999 case TYPE_MTJMPR:
33000 case TYPE_ISYNC:
33001 case TYPE_SYNC:
33002 case TYPE_LOAD_L:
33003 case TYPE_STORE_C:
33004 return true;
33005 case TYPE_SHIFT:
33006 if (get_attr_dot (insn) == DOT_NO
33007 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33008 return true;
33009 else
33010 break;
33011 case TYPE_DIV:
33012 if (get_attr_size (insn) == SIZE_32)
33013 return true;
33014 else
33015 break;
33016 default:
33017 break;
33019 break;
33020 case PROCESSOR_POWER7:
33021 type = get_attr_type (insn);
33023 switch (type)
33025 case TYPE_ISYNC:
33026 case TYPE_SYNC:
33027 case TYPE_LOAD_L:
33028 case TYPE_STORE_C:
33029 return true;
33030 case TYPE_LOAD:
33031 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33032 && get_attr_update (insn) == UPDATE_YES)
33033 return true;
33034 else
33035 break;
33036 case TYPE_STORE:
33037 if (get_attr_update (insn) == UPDATE_YES
33038 && get_attr_indexed (insn) == INDEXED_YES)
33039 return true;
33040 else
33041 break;
33042 default:
33043 break;
33045 break;
33046 case PROCESSOR_POWER8:
33047 type = get_attr_type (insn);
33049 switch (type)
33051 case TYPE_MFCR:
33052 case TYPE_MTCR:
33053 case TYPE_ISYNC:
33054 case TYPE_SYNC:
33055 case TYPE_LOAD_L:
33056 case TYPE_STORE_C:
33057 return true;
33058 case TYPE_LOAD:
33059 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33060 && get_attr_update (insn) == UPDATE_YES)
33061 return true;
33062 else
33063 break;
33064 case TYPE_STORE:
33065 if (get_attr_update (insn) == UPDATE_YES
33066 && get_attr_indexed (insn) == INDEXED_YES)
33067 return true;
33068 else
33069 break;
33070 default:
33071 break;
33073 break;
33074 default:
33075 break;
33078 return false;
33081 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
33082 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
33084 static bool
33085 is_costly_group (rtx *group_insns, rtx next_insn)
33087 int i;
33088 int issue_rate = rs6000_issue_rate ();
33090 for (i = 0; i < issue_rate; i++)
33092 sd_iterator_def sd_it;
33093 dep_t dep;
33094 rtx insn = group_insns[i];
33096 if (!insn)
33097 continue;
33099 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
33101 rtx next = DEP_CON (dep);
33103 if (next == next_insn
33104 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
33105 return true;
33109 return false;
33112 /* Utility of the function redefine_groups.
33113 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
33114 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
33115 to keep it "far" (in a separate group) from GROUP_INSNS, following
33116 one of the following schemes, depending on the value of the flag
33117 -minsert_sched_nops = X:
33118 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
33119 in order to force NEXT_INSN into a separate group.
33120 (2) X < sched_finish_regroup_exact: insert exactly X nops.
33121 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
33122 insertion (has a group just ended, how many vacant issue slots remain in the
33123 last group, and how many dispatch groups were encountered so far). */
33125 static int
33126 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
33127 rtx_insn *next_insn, bool *group_end, int can_issue_more,
33128 int *group_count)
33130 rtx nop;
33131 bool force;
33132 int issue_rate = rs6000_issue_rate ();
33133 bool end = *group_end;
33134 int i;
33136 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
33137 return can_issue_more;
33139 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
33140 return can_issue_more;
33142 force = is_costly_group (group_insns, next_insn);
33143 if (!force)
33144 return can_issue_more;
33146 if (sched_verbose > 6)
33147 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
33148 *group_count ,can_issue_more);
33150 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
33152 if (*group_end)
33153 can_issue_more = 0;
33155 /* Since only a branch can be issued in the last issue_slot, it is
33156 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
33157 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
33158 in this case the last nop will start a new group and the branch
33159 will be forced to the new group. */
33160 if (can_issue_more && !is_branch_slot_insn (next_insn))
33161 can_issue_more--;
33163 /* Do we have a special group ending nop? */
33164 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
33165 || rs6000_cpu_attr == CPU_POWER8)
33167 nop = gen_group_ending_nop ();
33168 emit_insn_before (nop, next_insn);
33169 can_issue_more = 0;
33171 else
33172 while (can_issue_more > 0)
33174 nop = gen_nop ();
33175 emit_insn_before (nop, next_insn);
33176 can_issue_more--;
33179 *group_end = true;
33180 return 0;
33183 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
33185 int n_nops = rs6000_sched_insert_nops;
33187 /* Nops can't be issued from the branch slot, so the effective
33188 issue_rate for nops is 'issue_rate - 1'. */
33189 if (can_issue_more == 0)
33190 can_issue_more = issue_rate;
33191 can_issue_more--;
33192 if (can_issue_more == 0)
33194 can_issue_more = issue_rate - 1;
33195 (*group_count)++;
33196 end = true;
33197 for (i = 0; i < issue_rate; i++)
33199 group_insns[i] = 0;
33203 while (n_nops > 0)
33205 nop = gen_nop ();
33206 emit_insn_before (nop, next_insn);
33207 if (can_issue_more == issue_rate - 1) /* new group begins */
33208 end = false;
33209 can_issue_more--;
33210 if (can_issue_more == 0)
33212 can_issue_more = issue_rate - 1;
33213 (*group_count)++;
33214 end = true;
33215 for (i = 0; i < issue_rate; i++)
33217 group_insns[i] = 0;
33220 n_nops--;
33223 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
33224 can_issue_more++;
33226 /* Is next_insn going to start a new group? */
33227 *group_end
33228 = (end
33229 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33230 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33231 || (can_issue_more < issue_rate &&
33232 insn_terminates_group_p (next_insn, previous_group)));
33233 if (*group_end && end)
33234 (*group_count)--;
33236 if (sched_verbose > 6)
33237 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
33238 *group_count, can_issue_more);
33239 return can_issue_more;
33242 return can_issue_more;
33245 /* This function tries to synch the dispatch groups that the compiler "sees"
33246 with the dispatch groups that the processor dispatcher is expected to
33247 form in practice. It tries to achieve this synchronization by forcing the
33248 estimated processor grouping on the compiler (as opposed to the function
33249 'pad_goups' which tries to force the scheduler's grouping on the processor).
33251 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
33252 examines the (estimated) dispatch groups that will be formed by the processor
33253 dispatcher. It marks these group boundaries to reflect the estimated
33254 processor grouping, overriding the grouping that the scheduler had marked.
33255 Depending on the value of the flag '-minsert-sched-nops' this function can
33256 force certain insns into separate groups or force a certain distance between
33257 them by inserting nops, for example, if there exists a "costly dependence"
33258 between the insns.
33260 The function estimates the group boundaries that the processor will form as
33261 follows: It keeps track of how many vacant issue slots are available after
33262 each insn. A subsequent insn will start a new group if one of the following
33263 4 cases applies:
33264 - no more vacant issue slots remain in the current dispatch group.
33265 - only the last issue slot, which is the branch slot, is vacant, but the next
33266 insn is not a branch.
33267 - only the last 2 or less issue slots, including the branch slot, are vacant,
33268 which means that a cracked insn (which occupies two issue slots) can't be
33269 issued in this group.
33270 - less than 'issue_rate' slots are vacant, and the next insn always needs to
33271 start a new group. */
33273 static int
33274 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33275 rtx_insn *tail)
33277 rtx_insn *insn, *next_insn;
33278 int issue_rate;
33279 int can_issue_more;
33280 int slot, i;
33281 bool group_end;
33282 int group_count = 0;
33283 rtx *group_insns;
33285 /* Initialize. */
33286 issue_rate = rs6000_issue_rate ();
33287 group_insns = XALLOCAVEC (rtx, issue_rate);
33288 for (i = 0; i < issue_rate; i++)
33290 group_insns[i] = 0;
33292 can_issue_more = issue_rate;
33293 slot = 0;
33294 insn = get_next_active_insn (prev_head_insn, tail);
33295 group_end = false;
33297 while (insn != NULL_RTX)
33299 slot = (issue_rate - can_issue_more);
33300 group_insns[slot] = insn;
33301 can_issue_more =
33302 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33303 if (insn_terminates_group_p (insn, current_group))
33304 can_issue_more = 0;
33306 next_insn = get_next_active_insn (insn, tail);
33307 if (next_insn == NULL_RTX)
33308 return group_count + 1;
33310 /* Is next_insn going to start a new group? */
33311 group_end
33312 = (can_issue_more == 0
33313 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33314 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33315 || (can_issue_more < issue_rate &&
33316 insn_terminates_group_p (next_insn, previous_group)));
33318 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
33319 next_insn, &group_end, can_issue_more,
33320 &group_count);
33322 if (group_end)
33324 group_count++;
33325 can_issue_more = 0;
33326 for (i = 0; i < issue_rate; i++)
33328 group_insns[i] = 0;
33332 if (GET_MODE (next_insn) == TImode && can_issue_more)
33333 PUT_MODE (next_insn, VOIDmode);
33334 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
33335 PUT_MODE (next_insn, TImode);
33337 insn = next_insn;
33338 if (can_issue_more == 0)
33339 can_issue_more = issue_rate;
33340 } /* while */
33342 return group_count;
33345 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
33346 dispatch group boundaries that the scheduler had marked. Pad with nops
33347 any dispatch groups which have vacant issue slots, in order to force the
33348 scheduler's grouping on the processor dispatcher. The function
33349 returns the number of dispatch groups found. */
33351 static int
33352 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33353 rtx_insn *tail)
33355 rtx_insn *insn, *next_insn;
33356 rtx nop;
33357 int issue_rate;
33358 int can_issue_more;
33359 int group_end;
33360 int group_count = 0;
33362 /* Initialize issue_rate. */
33363 issue_rate = rs6000_issue_rate ();
33364 can_issue_more = issue_rate;
33366 insn = get_next_active_insn (prev_head_insn, tail);
33367 next_insn = get_next_active_insn (insn, tail);
33369 while (insn != NULL_RTX)
33371 can_issue_more =
33372 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33374 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
33376 if (next_insn == NULL_RTX)
33377 break;
33379 if (group_end)
33381 /* If the scheduler had marked group termination at this location
33382 (between insn and next_insn), and neither insn nor next_insn will
33383 force group termination, pad the group with nops to force group
33384 termination. */
33385 if (can_issue_more
33386 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
33387 && !insn_terminates_group_p (insn, current_group)
33388 && !insn_terminates_group_p (next_insn, previous_group))
33390 if (!is_branch_slot_insn (next_insn))
33391 can_issue_more--;
33393 while (can_issue_more)
33395 nop = gen_nop ();
33396 emit_insn_before (nop, next_insn);
33397 can_issue_more--;
33401 can_issue_more = issue_rate;
33402 group_count++;
33405 insn = next_insn;
33406 next_insn = get_next_active_insn (insn, tail);
33409 return group_count;
33412 /* We're beginning a new block. Initialize data structures as necessary. */
33414 static void
33415 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
33416 int sched_verbose ATTRIBUTE_UNUSED,
33417 int max_ready ATTRIBUTE_UNUSED)
33419 last_scheduled_insn = NULL;
33420 load_store_pendulum = 0;
33421 divide_cnt = 0;
33422 vec_load_pendulum = 0;
33425 /* The following function is called at the end of scheduling BB.
33426 After reload, it inserts nops at insn group bundling. */
33428 static void
33429 rs6000_sched_finish (FILE *dump, int sched_verbose)
33431 int n_groups;
33433 if (sched_verbose)
33434 fprintf (dump, "=== Finishing schedule.\n");
33436 if (reload_completed && rs6000_sched_groups)
33438 /* Do not run sched_finish hook when selective scheduling enabled. */
33439 if (sel_sched_p ())
33440 return;
33442 if (rs6000_sched_insert_nops == sched_finish_none)
33443 return;
33445 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
33446 n_groups = pad_groups (dump, sched_verbose,
33447 current_sched_info->prev_head,
33448 current_sched_info->next_tail);
33449 else
33450 n_groups = redefine_groups (dump, sched_verbose,
33451 current_sched_info->prev_head,
33452 current_sched_info->next_tail);
33454 if (sched_verbose >= 6)
33456 fprintf (dump, "ngroups = %d\n", n_groups);
33457 print_rtl (dump, current_sched_info->prev_head);
33458 fprintf (dump, "Done finish_sched\n");
33463 struct rs6000_sched_context
33465 short cached_can_issue_more;
33466 rtx_insn *last_scheduled_insn;
33467 int load_store_pendulum;
33468 int divide_cnt;
33469 int vec_load_pendulum;
33472 typedef struct rs6000_sched_context rs6000_sched_context_def;
33473 typedef rs6000_sched_context_def *rs6000_sched_context_t;
33475 /* Allocate store for new scheduling context. */
33476 static void *
33477 rs6000_alloc_sched_context (void)
33479 return xmalloc (sizeof (rs6000_sched_context_def));
33482 /* If CLEAN_P is true then initializes _SC with clean data,
33483 and from the global context otherwise. */
33484 static void
33485 rs6000_init_sched_context (void *_sc, bool clean_p)
33487 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33489 if (clean_p)
33491 sc->cached_can_issue_more = 0;
33492 sc->last_scheduled_insn = NULL;
33493 sc->load_store_pendulum = 0;
33494 sc->divide_cnt = 0;
33495 sc->vec_load_pendulum = 0;
33497 else
33499 sc->cached_can_issue_more = cached_can_issue_more;
33500 sc->last_scheduled_insn = last_scheduled_insn;
33501 sc->load_store_pendulum = load_store_pendulum;
33502 sc->divide_cnt = divide_cnt;
33503 sc->vec_load_pendulum = vec_load_pendulum;
33507 /* Sets the global scheduling context to the one pointed to by _SC. */
33508 static void
33509 rs6000_set_sched_context (void *_sc)
33511 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33513 gcc_assert (sc != NULL);
33515 cached_can_issue_more = sc->cached_can_issue_more;
33516 last_scheduled_insn = sc->last_scheduled_insn;
33517 load_store_pendulum = sc->load_store_pendulum;
33518 divide_cnt = sc->divide_cnt;
33519 vec_load_pendulum = sc->vec_load_pendulum;
33522 /* Free _SC. */
33523 static void
33524 rs6000_free_sched_context (void *_sc)
33526 gcc_assert (_sc != NULL);
33528 free (_sc);
33532 /* Length in units of the trampoline for entering a nested function. */
33535 rs6000_trampoline_size (void)
33537 int ret = 0;
33539 switch (DEFAULT_ABI)
33541 default:
33542 gcc_unreachable ();
33544 case ABI_AIX:
33545 ret = (TARGET_32BIT) ? 12 : 24;
33546 break;
33548 case ABI_ELFv2:
33549 gcc_assert (!TARGET_32BIT);
33550 ret = 32;
33551 break;
33553 case ABI_DARWIN:
33554 case ABI_V4:
33555 ret = (TARGET_32BIT) ? 40 : 48;
33556 break;
33559 return ret;
33562 /* Emit RTL insns to initialize the variable parts of a trampoline.
33563 FNADDR is an RTX for the address of the function's pure code.
33564 CXT is an RTX for the static chain value for the function. */
33566 static void
33567 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
33569 int regsize = (TARGET_32BIT) ? 4 : 8;
33570 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
33571 rtx ctx_reg = force_reg (Pmode, cxt);
33572 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
33574 switch (DEFAULT_ABI)
33576 default:
33577 gcc_unreachable ();
33579 /* Under AIX, just build the 3 word function descriptor */
33580 case ABI_AIX:
33582 rtx fnmem, fn_reg, toc_reg;
33584 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
33585 error ("You cannot take the address of a nested function if you use "
33586 "the -mno-pointers-to-nested-functions option.");
33588 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
33589 fn_reg = gen_reg_rtx (Pmode);
33590 toc_reg = gen_reg_rtx (Pmode);
33592 /* Macro to shorten the code expansions below. */
33593 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
33595 m_tramp = replace_equiv_address (m_tramp, addr);
33597 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
33598 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
33599 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
33600 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
33601 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
33603 # undef MEM_PLUS
33605 break;
33607 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
33608 case ABI_ELFv2:
33609 case ABI_DARWIN:
33610 case ABI_V4:
33611 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
33612 LCT_NORMAL, VOIDmode, 4,
33613 addr, Pmode,
33614 GEN_INT (rs6000_trampoline_size ()), SImode,
33615 fnaddr, Pmode,
33616 ctx_reg, Pmode);
33617 break;
33622 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
33623 identifier as an argument, so the front end shouldn't look it up. */
33625 static bool
33626 rs6000_attribute_takes_identifier_p (const_tree attr_id)
33628 return is_attribute_p ("altivec", attr_id);
33631 /* Handle the "altivec" attribute. The attribute may have
33632 arguments as follows:
33634 __attribute__((altivec(vector__)))
33635 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
33636 __attribute__((altivec(bool__))) (always followed by 'unsigned')
33638 and may appear more than once (e.g., 'vector bool char') in a
33639 given declaration. */
33641 static tree
33642 rs6000_handle_altivec_attribute (tree *node,
33643 tree name ATTRIBUTE_UNUSED,
33644 tree args,
33645 int flags ATTRIBUTE_UNUSED,
33646 bool *no_add_attrs)
33648 tree type = *node, result = NULL_TREE;
33649 machine_mode mode;
33650 int unsigned_p;
33651 char altivec_type
33652 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
33653 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
33654 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
33655 : '?');
33657 while (POINTER_TYPE_P (type)
33658 || TREE_CODE (type) == FUNCTION_TYPE
33659 || TREE_CODE (type) == METHOD_TYPE
33660 || TREE_CODE (type) == ARRAY_TYPE)
33661 type = TREE_TYPE (type);
33663 mode = TYPE_MODE (type);
33665 /* Check for invalid AltiVec type qualifiers. */
33666 if (type == long_double_type_node)
33667 error ("use of %<long double%> in AltiVec types is invalid");
33668 else if (type == boolean_type_node)
33669 error ("use of boolean types in AltiVec types is invalid");
33670 else if (TREE_CODE (type) == COMPLEX_TYPE)
33671 error ("use of %<complex%> in AltiVec types is invalid");
33672 else if (DECIMAL_FLOAT_MODE_P (mode))
33673 error ("use of decimal floating point types in AltiVec types is invalid");
33674 else if (!TARGET_VSX)
33676 if (type == long_unsigned_type_node || type == long_integer_type_node)
33678 if (TARGET_64BIT)
33679 error ("use of %<long%> in AltiVec types is invalid for "
33680 "64-bit code without -mvsx");
33681 else if (rs6000_warn_altivec_long)
33682 warning (0, "use of %<long%> in AltiVec types is deprecated; "
33683 "use %<int%>");
33685 else if (type == long_long_unsigned_type_node
33686 || type == long_long_integer_type_node)
33687 error ("use of %<long long%> in AltiVec types is invalid without "
33688 "-mvsx");
33689 else if (type == double_type_node)
33690 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
33693 switch (altivec_type)
33695 case 'v':
33696 unsigned_p = TYPE_UNSIGNED (type);
33697 switch (mode)
33699 case TImode:
33700 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
33701 break;
33702 case DImode:
33703 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
33704 break;
33705 case SImode:
33706 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
33707 break;
33708 case HImode:
33709 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
33710 break;
33711 case QImode:
33712 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
33713 break;
33714 case SFmode: result = V4SF_type_node; break;
33715 case DFmode: result = V2DF_type_node; break;
33716 /* If the user says 'vector int bool', we may be handed the 'bool'
33717 attribute _before_ the 'vector' attribute, and so select the
33718 proper type in the 'b' case below. */
33719 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
33720 case V2DImode: case V2DFmode:
33721 result = type;
33722 default: break;
33724 break;
33725 case 'b':
33726 switch (mode)
33728 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
33729 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
33730 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
33731 case QImode: case V16QImode: result = bool_V16QI_type_node;
33732 default: break;
33734 break;
33735 case 'p':
33736 switch (mode)
33738 case V8HImode: result = pixel_V8HI_type_node;
33739 default: break;
33741 default: break;
33744 /* Propagate qualifiers attached to the element type
33745 onto the vector type. */
33746 if (result && result != type && TYPE_QUALS (type))
33747 result = build_qualified_type (result, TYPE_QUALS (type));
33749 *no_add_attrs = true; /* No need to hang on to the attribute. */
33751 if (result)
33752 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
33754 return NULL_TREE;
33757 /* AltiVec defines four built-in scalar types that serve as vector
33758 elements; we must teach the compiler how to mangle them. */
33760 static const char *
33761 rs6000_mangle_type (const_tree type)
33763 type = TYPE_MAIN_VARIANT (type);
33765 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33766 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33767 return NULL;
33769 if (type == bool_char_type_node) return "U6__boolc";
33770 if (type == bool_short_type_node) return "U6__bools";
33771 if (type == pixel_type_node) return "u7__pixel";
33772 if (type == bool_int_type_node) return "U6__booli";
33773 if (type == bool_long_type_node) return "U6__booll";
33775 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
33776 "g" for IBM extended double, no matter whether it is long double (using
33777 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
33778 if (TARGET_FLOAT128_TYPE)
33780 if (type == ieee128_float_type_node)
33781 return "U10__float128";
33783 if (type == ibm128_float_type_node)
33784 return "g";
33786 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
33787 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
33790 /* Mangle IBM extended float long double as `g' (__float128) on
33791 powerpc*-linux where long-double-64 previously was the default. */
33792 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
33793 && TARGET_ELF
33794 && TARGET_LONG_DOUBLE_128
33795 && !TARGET_IEEEQUAD)
33796 return "g";
33798 /* For all other types, use normal C++ mangling. */
33799 return NULL;
33802 /* Handle a "longcall" or "shortcall" attribute; arguments as in
33803 struct attribute_spec.handler. */
33805 static tree
33806 rs6000_handle_longcall_attribute (tree *node, tree name,
33807 tree args ATTRIBUTE_UNUSED,
33808 int flags ATTRIBUTE_UNUSED,
33809 bool *no_add_attrs)
33811 if (TREE_CODE (*node) != FUNCTION_TYPE
33812 && TREE_CODE (*node) != FIELD_DECL
33813 && TREE_CODE (*node) != TYPE_DECL)
33815 warning (OPT_Wattributes, "%qE attribute only applies to functions",
33816 name);
33817 *no_add_attrs = true;
33820 return NULL_TREE;
33823 /* Set longcall attributes on all functions declared when
33824 rs6000_default_long_calls is true. */
33825 static void
33826 rs6000_set_default_type_attributes (tree type)
33828 if (rs6000_default_long_calls
33829 && (TREE_CODE (type) == FUNCTION_TYPE
33830 || TREE_CODE (type) == METHOD_TYPE))
33831 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
33832 NULL_TREE,
33833 TYPE_ATTRIBUTES (type));
33835 #if TARGET_MACHO
33836 darwin_set_default_type_attributes (type);
33837 #endif
33840 /* Return a reference suitable for calling a function with the
33841 longcall attribute. */
33844 rs6000_longcall_ref (rtx call_ref)
33846 const char *call_name;
33847 tree node;
33849 if (GET_CODE (call_ref) != SYMBOL_REF)
33850 return call_ref;
33852 /* System V adds '.' to the internal name, so skip them. */
33853 call_name = XSTR (call_ref, 0);
33854 if (*call_name == '.')
33856 while (*call_name == '.')
33857 call_name++;
33859 node = get_identifier (call_name);
33860 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
33863 return force_reg (Pmode, call_ref);
33866 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
33867 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
33868 #endif
33870 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
33871 struct attribute_spec.handler. */
33872 static tree
33873 rs6000_handle_struct_attribute (tree *node, tree name,
33874 tree args ATTRIBUTE_UNUSED,
33875 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
33877 tree *type = NULL;
33878 if (DECL_P (*node))
33880 if (TREE_CODE (*node) == TYPE_DECL)
33881 type = &TREE_TYPE (*node);
33883 else
33884 type = node;
33886 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
33887 || TREE_CODE (*type) == UNION_TYPE)))
33889 warning (OPT_Wattributes, "%qE attribute ignored", name);
33890 *no_add_attrs = true;
33893 else if ((is_attribute_p ("ms_struct", name)
33894 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
33895 || ((is_attribute_p ("gcc_struct", name)
33896 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
33898 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
33899 name);
33900 *no_add_attrs = true;
33903 return NULL_TREE;
33906 static bool
33907 rs6000_ms_bitfield_layout_p (const_tree record_type)
33909 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
33910 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
33911 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
33914 #ifdef USING_ELFOS_H
33916 /* A get_unnamed_section callback, used for switching to toc_section. */
33918 static void
33919 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33921 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33922 && TARGET_MINIMAL_TOC)
33924 if (!toc_initialized)
33926 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33927 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33928 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
33929 fprintf (asm_out_file, "\t.tc ");
33930 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
33931 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33932 fprintf (asm_out_file, "\n");
33934 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33935 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33936 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33937 fprintf (asm_out_file, " = .+32768\n");
33938 toc_initialized = 1;
33940 else
33941 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33943 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33945 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33946 if (!toc_initialized)
33948 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33949 toc_initialized = 1;
33952 else
33954 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33955 if (!toc_initialized)
33957 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33958 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33959 fprintf (asm_out_file, " = .+32768\n");
33960 toc_initialized = 1;
33965 /* Implement TARGET_ASM_INIT_SECTIONS. */
33967 static void
33968 rs6000_elf_asm_init_sections (void)
33970 toc_section
33971 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
33973 sdata2_section
33974 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
33975 SDATA2_SECTION_ASM_OP);
33978 /* Implement TARGET_SELECT_RTX_SECTION. */
33980 static section *
33981 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
33982 unsigned HOST_WIDE_INT align)
33984 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33985 return toc_section;
33986 else
33987 return default_elf_select_rtx_section (mode, x, align);
33990 /* For a SYMBOL_REF, set generic flags and then perform some
33991 target-specific processing.
33993 When the AIX ABI is requested on a non-AIX system, replace the
33994 function name with the real name (with a leading .) rather than the
33995 function descriptor name. This saves a lot of overriding code to
33996 read the prefixes. */
33998 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
33999 static void
34000 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
34002 default_encode_section_info (decl, rtl, first);
34004 if (first
34005 && TREE_CODE (decl) == FUNCTION_DECL
34006 && !TARGET_AIX
34007 && DEFAULT_ABI == ABI_AIX)
34009 rtx sym_ref = XEXP (rtl, 0);
34010 size_t len = strlen (XSTR (sym_ref, 0));
34011 char *str = XALLOCAVEC (char, len + 2);
34012 str[0] = '.';
34013 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
34014 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
34018 static inline bool
34019 compare_section_name (const char *section, const char *templ)
34021 int len;
34023 len = strlen (templ);
34024 return (strncmp (section, templ, len) == 0
34025 && (section[len] == 0 || section[len] == '.'));
34028 bool
34029 rs6000_elf_in_small_data_p (const_tree decl)
34031 if (rs6000_sdata == SDATA_NONE)
34032 return false;
34034 /* We want to merge strings, so we never consider them small data. */
34035 if (TREE_CODE (decl) == STRING_CST)
34036 return false;
34038 /* Functions are never in the small data area. */
34039 if (TREE_CODE (decl) == FUNCTION_DECL)
34040 return false;
34042 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
34044 const char *section = DECL_SECTION_NAME (decl);
34045 if (compare_section_name (section, ".sdata")
34046 || compare_section_name (section, ".sdata2")
34047 || compare_section_name (section, ".gnu.linkonce.s")
34048 || compare_section_name (section, ".sbss")
34049 || compare_section_name (section, ".sbss2")
34050 || compare_section_name (section, ".gnu.linkonce.sb")
34051 || strcmp (section, ".PPC.EMB.sdata0") == 0
34052 || strcmp (section, ".PPC.EMB.sbss0") == 0)
34053 return true;
34055 else
34057 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
34059 if (size > 0
34060 && size <= g_switch_value
34061 /* If it's not public, and we're not going to reference it there,
34062 there's no need to put it in the small data section. */
34063 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
34064 return true;
34067 return false;
34070 #endif /* USING_ELFOS_H */
34072 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
34074 static bool
34075 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
34077 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
34080 /* Do not place thread-local symbols refs in the object blocks. */
34082 static bool
34083 rs6000_use_blocks_for_decl_p (const_tree decl)
34085 return !DECL_THREAD_LOCAL_P (decl);
34088 /* Return a REG that occurs in ADDR with coefficient 1.
34089 ADDR can be effectively incremented by incrementing REG.
34091 r0 is special and we must not select it as an address
34092 register by this routine since our caller will try to
34093 increment the returned register via an "la" instruction. */
34096 find_addr_reg (rtx addr)
34098 while (GET_CODE (addr) == PLUS)
34100 if (GET_CODE (XEXP (addr, 0)) == REG
34101 && REGNO (XEXP (addr, 0)) != 0)
34102 addr = XEXP (addr, 0);
34103 else if (GET_CODE (XEXP (addr, 1)) == REG
34104 && REGNO (XEXP (addr, 1)) != 0)
34105 addr = XEXP (addr, 1);
34106 else if (CONSTANT_P (XEXP (addr, 0)))
34107 addr = XEXP (addr, 1);
34108 else if (CONSTANT_P (XEXP (addr, 1)))
34109 addr = XEXP (addr, 0);
34110 else
34111 gcc_unreachable ();
34113 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
34114 return addr;
34117 void
34118 rs6000_fatal_bad_address (rtx op)
34120 fatal_insn ("bad address", op);
34123 #if TARGET_MACHO
34125 typedef struct branch_island_d {
34126 tree function_name;
34127 tree label_name;
34128 int line_number;
34129 } branch_island;
34132 static vec<branch_island, va_gc> *branch_islands;
34134 /* Remember to generate a branch island for far calls to the given
34135 function. */
34137 static void
34138 add_compiler_branch_island (tree label_name, tree function_name,
34139 int line_number)
34141 branch_island bi = {function_name, label_name, line_number};
34142 vec_safe_push (branch_islands, bi);
34145 /* Generate far-jump branch islands for everything recorded in
34146 branch_islands. Invoked immediately after the last instruction of
34147 the epilogue has been emitted; the branch islands must be appended
34148 to, and contiguous with, the function body. Mach-O stubs are
34149 generated in machopic_output_stub(). */
34151 static void
34152 macho_branch_islands (void)
34154 char tmp_buf[512];
34156 while (!vec_safe_is_empty (branch_islands))
34158 branch_island *bi = &branch_islands->last ();
34159 const char *label = IDENTIFIER_POINTER (bi->label_name);
34160 const char *name = IDENTIFIER_POINTER (bi->function_name);
34161 char name_buf[512];
34162 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
34163 if (name[0] == '*' || name[0] == '&')
34164 strcpy (name_buf, name+1);
34165 else
34167 name_buf[0] = '_';
34168 strcpy (name_buf+1, name);
34170 strcpy (tmp_buf, "\n");
34171 strcat (tmp_buf, label);
34172 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34173 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34174 dbxout_stabd (N_SLINE, bi->line_number);
34175 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34176 if (flag_pic)
34178 if (TARGET_LINK_STACK)
34180 char name[32];
34181 get_ppc476_thunk_name (name);
34182 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
34183 strcat (tmp_buf, name);
34184 strcat (tmp_buf, "\n");
34185 strcat (tmp_buf, label);
34186 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34188 else
34190 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
34191 strcat (tmp_buf, label);
34192 strcat (tmp_buf, "_pic\n");
34193 strcat (tmp_buf, label);
34194 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34197 strcat (tmp_buf, "\taddis r11,r11,ha16(");
34198 strcat (tmp_buf, name_buf);
34199 strcat (tmp_buf, " - ");
34200 strcat (tmp_buf, label);
34201 strcat (tmp_buf, "_pic)\n");
34203 strcat (tmp_buf, "\tmtlr r0\n");
34205 strcat (tmp_buf, "\taddi r12,r11,lo16(");
34206 strcat (tmp_buf, name_buf);
34207 strcat (tmp_buf, " - ");
34208 strcat (tmp_buf, label);
34209 strcat (tmp_buf, "_pic)\n");
34211 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
34213 else
34215 strcat (tmp_buf, ":\nlis r12,hi16(");
34216 strcat (tmp_buf, name_buf);
34217 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
34218 strcat (tmp_buf, name_buf);
34219 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
34221 output_asm_insn (tmp_buf, 0);
34222 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34223 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34224 dbxout_stabd (N_SLINE, bi->line_number);
34225 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34226 branch_islands->pop ();
34230 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
34231 already there or not. */
34233 static int
34234 no_previous_def (tree function_name)
34236 branch_island *bi;
34237 unsigned ix;
34239 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34240 if (function_name == bi->function_name)
34241 return 0;
34242 return 1;
34245 /* GET_PREV_LABEL gets the label name from the previous definition of
34246 the function. */
34248 static tree
34249 get_prev_label (tree function_name)
34251 branch_island *bi;
34252 unsigned ix;
34254 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34255 if (function_name == bi->function_name)
34256 return bi->label_name;
34257 return NULL_TREE;
34260 /* INSN is either a function call or a millicode call. It may have an
34261 unconditional jump in its delay slot.
34263 CALL_DEST is the routine we are calling. */
34265 char *
34266 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
34267 int cookie_operand_number)
34269 static char buf[256];
34270 if (darwin_emit_branch_islands
34271 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
34272 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
34274 tree labelname;
34275 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
34277 if (no_previous_def (funname))
34279 rtx label_rtx = gen_label_rtx ();
34280 char *label_buf, temp_buf[256];
34281 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
34282 CODE_LABEL_NUMBER (label_rtx));
34283 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
34284 labelname = get_identifier (label_buf);
34285 add_compiler_branch_island (labelname, funname, insn_line (insn));
34287 else
34288 labelname = get_prev_label (funname);
34290 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
34291 instruction will reach 'foo', otherwise link as 'bl L42'".
34292 "L42" should be a 'branch island', that will do a far jump to
34293 'foo'. Branch islands are generated in
34294 macho_branch_islands(). */
34295 sprintf (buf, "jbsr %%z%d,%.246s",
34296 dest_operand_number, IDENTIFIER_POINTER (labelname));
34298 else
34299 sprintf (buf, "bl %%z%d", dest_operand_number);
34300 return buf;
34303 /* Generate PIC and indirect symbol stubs. */
34305 void
34306 machopic_output_stub (FILE *file, const char *symb, const char *stub)
34308 unsigned int length;
34309 char *symbol_name, *lazy_ptr_name;
34310 char *local_label_0;
34311 static int label = 0;
34313 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34314 symb = (*targetm.strip_name_encoding) (symb);
34317 length = strlen (symb);
34318 symbol_name = XALLOCAVEC (char, length + 32);
34319 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
34321 lazy_ptr_name = XALLOCAVEC (char, length + 32);
34322 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
34324 if (flag_pic == 2)
34325 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
34326 else
34327 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
34329 if (flag_pic == 2)
34331 fprintf (file, "\t.align 5\n");
34333 fprintf (file, "%s:\n", stub);
34334 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34336 label++;
34337 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
34338 sprintf (local_label_0, "\"L%011d$spb\"", label);
34340 fprintf (file, "\tmflr r0\n");
34341 if (TARGET_LINK_STACK)
34343 char name[32];
34344 get_ppc476_thunk_name (name);
34345 fprintf (file, "\tbl %s\n", name);
34346 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34348 else
34350 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
34351 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34353 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
34354 lazy_ptr_name, local_label_0);
34355 fprintf (file, "\tmtlr r0\n");
34356 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
34357 (TARGET_64BIT ? "ldu" : "lwzu"),
34358 lazy_ptr_name, local_label_0);
34359 fprintf (file, "\tmtctr r12\n");
34360 fprintf (file, "\tbctr\n");
34362 else
34364 fprintf (file, "\t.align 4\n");
34366 fprintf (file, "%s:\n", stub);
34367 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34369 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
34370 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
34371 (TARGET_64BIT ? "ldu" : "lwzu"),
34372 lazy_ptr_name);
34373 fprintf (file, "\tmtctr r12\n");
34374 fprintf (file, "\tbctr\n");
34377 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
34378 fprintf (file, "%s:\n", lazy_ptr_name);
34379 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34380 fprintf (file, "%sdyld_stub_binding_helper\n",
34381 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
34384 /* Legitimize PIC addresses. If the address is already
34385 position-independent, we return ORIG. Newly generated
34386 position-independent addresses go into a reg. This is REG if non
34387 zero, otherwise we allocate register(s) as necessary. */
34389 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
34392 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
34393 rtx reg)
34395 rtx base, offset;
34397 if (reg == NULL && ! reload_in_progress && ! reload_completed)
34398 reg = gen_reg_rtx (Pmode);
34400 if (GET_CODE (orig) == CONST)
34402 rtx reg_temp;
34404 if (GET_CODE (XEXP (orig, 0)) == PLUS
34405 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
34406 return orig;
34408 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
34410 /* Use a different reg for the intermediate value, as
34411 it will be marked UNCHANGING. */
34412 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
34413 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
34414 Pmode, reg_temp);
34415 offset =
34416 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
34417 Pmode, reg);
34419 if (GET_CODE (offset) == CONST_INT)
34421 if (SMALL_INT (offset))
34422 return plus_constant (Pmode, base, INTVAL (offset));
34423 else if (! reload_in_progress && ! reload_completed)
34424 offset = force_reg (Pmode, offset);
34425 else
34427 rtx mem = force_const_mem (Pmode, orig);
34428 return machopic_legitimize_pic_address (mem, Pmode, reg);
34431 return gen_rtx_PLUS (Pmode, base, offset);
34434 /* Fall back on generic machopic code. */
34435 return machopic_legitimize_pic_address (orig, mode, reg);
34438 /* Output a .machine directive for the Darwin assembler, and call
34439 the generic start_file routine. */
34441 static void
34442 rs6000_darwin_file_start (void)
34444 static const struct
34446 const char *arg;
34447 const char *name;
34448 HOST_WIDE_INT if_set;
34449 } mapping[] = {
34450 { "ppc64", "ppc64", MASK_64BIT },
34451 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
34452 { "power4", "ppc970", 0 },
34453 { "G5", "ppc970", 0 },
34454 { "7450", "ppc7450", 0 },
34455 { "7400", "ppc7400", MASK_ALTIVEC },
34456 { "G4", "ppc7400", 0 },
34457 { "750", "ppc750", 0 },
34458 { "740", "ppc750", 0 },
34459 { "G3", "ppc750", 0 },
34460 { "604e", "ppc604e", 0 },
34461 { "604", "ppc604", 0 },
34462 { "603e", "ppc603", 0 },
34463 { "603", "ppc603", 0 },
34464 { "601", "ppc601", 0 },
34465 { NULL, "ppc", 0 } };
34466 const char *cpu_id = "";
34467 size_t i;
34469 rs6000_file_start ();
34470 darwin_file_start ();
34472 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
34474 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
34475 cpu_id = rs6000_default_cpu;
34477 if (global_options_set.x_rs6000_cpu_index)
34478 cpu_id = processor_target_table[rs6000_cpu_index].name;
34480 /* Look through the mapping array. Pick the first name that either
34481 matches the argument, has a bit set in IF_SET that is also set
34482 in the target flags, or has a NULL name. */
34484 i = 0;
34485 while (mapping[i].arg != NULL
34486 && strcmp (mapping[i].arg, cpu_id) != 0
34487 && (mapping[i].if_set & rs6000_isa_flags) == 0)
34488 i++;
34490 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
34493 #endif /* TARGET_MACHO */
34495 #if TARGET_ELF
34496 static int
34497 rs6000_elf_reloc_rw_mask (void)
34499 if (flag_pic)
34500 return 3;
34501 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34502 return 2;
34503 else
34504 return 0;
34507 /* Record an element in the table of global constructors. SYMBOL is
34508 a SYMBOL_REF of the function to be called; PRIORITY is a number
34509 between 0 and MAX_INIT_PRIORITY.
34511 This differs from default_named_section_asm_out_constructor in
34512 that we have special handling for -mrelocatable. */
34514 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
34515 static void
34516 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
34518 const char *section = ".ctors";
34519 char buf[18];
34521 if (priority != DEFAULT_INIT_PRIORITY)
34523 sprintf (buf, ".ctors.%.5u",
34524 /* Invert the numbering so the linker puts us in the proper
34525 order; constructors are run from right to left, and the
34526 linker sorts in increasing order. */
34527 MAX_INIT_PRIORITY - priority);
34528 section = buf;
34531 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34532 assemble_align (POINTER_SIZE);
34534 if (DEFAULT_ABI == ABI_V4
34535 && (TARGET_RELOCATABLE || flag_pic > 1))
34537 fputs ("\t.long (", asm_out_file);
34538 output_addr_const (asm_out_file, symbol);
34539 fputs (")@fixup\n", asm_out_file);
34541 else
34542 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34545 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
34546 static void
34547 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
34549 const char *section = ".dtors";
34550 char buf[18];
34552 if (priority != DEFAULT_INIT_PRIORITY)
34554 sprintf (buf, ".dtors.%.5u",
34555 /* Invert the numbering so the linker puts us in the proper
34556 order; constructors are run from right to left, and the
34557 linker sorts in increasing order. */
34558 MAX_INIT_PRIORITY - priority);
34559 section = buf;
34562 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34563 assemble_align (POINTER_SIZE);
34565 if (DEFAULT_ABI == ABI_V4
34566 && (TARGET_RELOCATABLE || flag_pic > 1))
34568 fputs ("\t.long (", asm_out_file);
34569 output_addr_const (asm_out_file, symbol);
34570 fputs (")@fixup\n", asm_out_file);
34572 else
34573 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34576 void
34577 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
34579 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
34581 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
34582 ASM_OUTPUT_LABEL (file, name);
34583 fputs (DOUBLE_INT_ASM_OP, file);
34584 rs6000_output_function_entry (file, name);
34585 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
34586 if (DOT_SYMBOLS)
34588 fputs ("\t.size\t", file);
34589 assemble_name (file, name);
34590 fputs (",24\n\t.type\t.", file);
34591 assemble_name (file, name);
34592 fputs (",@function\n", file);
34593 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
34595 fputs ("\t.globl\t.", file);
34596 assemble_name (file, name);
34597 putc ('\n', file);
34600 else
34601 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34602 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34603 rs6000_output_function_entry (file, name);
34604 fputs (":\n", file);
34605 return;
34608 if (DEFAULT_ABI == ABI_V4
34609 && (TARGET_RELOCATABLE || flag_pic > 1)
34610 && !TARGET_SECURE_PLT
34611 && (get_pool_size () != 0 || crtl->profile)
34612 && uses_TOC ())
34614 char buf[256];
34616 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34618 fprintf (file, "\t.long ");
34619 assemble_name (file, toc_label_name);
34620 need_toc_init = 1;
34621 putc ('-', file);
34622 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34623 assemble_name (file, buf);
34624 putc ('\n', file);
34627 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34628 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34630 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
34632 char buf[256];
34634 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34636 fprintf (file, "\t.quad .TOC.-");
34637 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34638 assemble_name (file, buf);
34639 putc ('\n', file);
34642 if (DEFAULT_ABI == ABI_AIX)
34644 const char *desc_name, *orig_name;
34646 orig_name = (*targetm.strip_name_encoding) (name);
34647 desc_name = orig_name;
34648 while (*desc_name == '.')
34649 desc_name++;
34651 if (TREE_PUBLIC (decl))
34652 fprintf (file, "\t.globl %s\n", desc_name);
34654 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34655 fprintf (file, "%s:\n", desc_name);
34656 fprintf (file, "\t.long %s\n", orig_name);
34657 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
34658 fputs ("\t.long 0\n", file);
34659 fprintf (file, "\t.previous\n");
34661 ASM_OUTPUT_LABEL (file, name);
34664 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
34665 static void
34666 rs6000_elf_file_end (void)
34668 #ifdef HAVE_AS_GNU_ATTRIBUTE
34669 /* ??? The value emitted depends on options active at file end.
34670 Assume anyone using #pragma or attributes that might change
34671 options knows what they are doing. */
34672 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
34673 && rs6000_passes_float)
34675 int fp;
34677 if (TARGET_DF_FPR | TARGET_DF_SPE)
34678 fp = 1;
34679 else if (TARGET_SF_FPR | TARGET_SF_SPE)
34680 fp = 3;
34681 else
34682 fp = 2;
34683 if (rs6000_passes_long_double)
34685 if (!TARGET_LONG_DOUBLE_128)
34686 fp |= 2 * 4;
34687 else if (TARGET_IEEEQUAD)
34688 fp |= 3 * 4;
34689 else
34690 fp |= 1 * 4;
34692 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
34694 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
34696 if (rs6000_passes_vector)
34697 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
34698 (TARGET_ALTIVEC_ABI ? 2
34699 : TARGET_SPE_ABI ? 3
34700 : 1));
34701 if (rs6000_returns_struct)
34702 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
34703 aix_struct_return ? 2 : 1);
34705 #endif
34706 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
34707 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
34708 file_end_indicate_exec_stack ();
34709 #endif
34711 if (flag_split_stack)
34712 file_end_indicate_split_stack ();
34714 if (cpu_builtin_p)
34716 /* We have expanded a CPU builtin, so we need to emit a reference to
34717 the special symbol that LIBC uses to declare it supports the
34718 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
34719 switch_to_section (data_section);
34720 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
34721 fprintf (asm_out_file, "\t%s %s\n",
34722 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
34725 #endif
34727 #if TARGET_XCOFF
34729 #ifndef HAVE_XCOFF_DWARF_EXTRAS
34730 #define HAVE_XCOFF_DWARF_EXTRAS 0
34731 #endif
34733 static enum unwind_info_type
34734 rs6000_xcoff_debug_unwind_info (void)
34736 return UI_NONE;
34739 static void
34740 rs6000_xcoff_asm_output_anchor (rtx symbol)
34742 char buffer[100];
34744 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
34745 SYMBOL_REF_BLOCK_OFFSET (symbol));
34746 fprintf (asm_out_file, "%s", SET_ASM_OP);
34747 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
34748 fprintf (asm_out_file, ",");
34749 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
34750 fprintf (asm_out_file, "\n");
34753 static void
34754 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
34756 fputs (GLOBAL_ASM_OP, stream);
34757 RS6000_OUTPUT_BASENAME (stream, name);
34758 putc ('\n', stream);
34761 /* A get_unnamed_decl callback, used for read-only sections. PTR
34762 points to the section string variable. */
34764 static void
34765 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
34767 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
34768 *(const char *const *) directive,
34769 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34772 /* Likewise for read-write sections. */
34774 static void
34775 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
34777 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
34778 *(const char *const *) directive,
34779 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34782 static void
34783 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
34785 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
34786 *(const char *const *) directive,
34787 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34790 /* A get_unnamed_section callback, used for switching to toc_section. */
34792 static void
34793 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
34795 if (TARGET_MINIMAL_TOC)
34797 /* toc_section is always selected at least once from
34798 rs6000_xcoff_file_start, so this is guaranteed to
34799 always be defined once and only once in each file. */
34800 if (!toc_initialized)
34802 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
34803 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
34804 toc_initialized = 1;
34806 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
34807 (TARGET_32BIT ? "" : ",3"));
34809 else
34810 fputs ("\t.toc\n", asm_out_file);
34813 /* Implement TARGET_ASM_INIT_SECTIONS. */
34815 static void
34816 rs6000_xcoff_asm_init_sections (void)
34818 read_only_data_section
34819 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
34820 &xcoff_read_only_section_name);
34822 private_data_section
34823 = get_unnamed_section (SECTION_WRITE,
34824 rs6000_xcoff_output_readwrite_section_asm_op,
34825 &xcoff_private_data_section_name);
34827 tls_data_section
34828 = get_unnamed_section (SECTION_TLS,
34829 rs6000_xcoff_output_tls_section_asm_op,
34830 &xcoff_tls_data_section_name);
34832 tls_private_data_section
34833 = get_unnamed_section (SECTION_TLS,
34834 rs6000_xcoff_output_tls_section_asm_op,
34835 &xcoff_private_data_section_name);
34837 read_only_private_data_section
34838 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
34839 &xcoff_private_data_section_name);
34841 toc_section
34842 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
34844 readonly_data_section = read_only_data_section;
34847 static int
34848 rs6000_xcoff_reloc_rw_mask (void)
34850 return 3;
34853 static void
34854 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
34855 tree decl ATTRIBUTE_UNUSED)
34857 int smclass;
34858 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
34860 if (flags & SECTION_EXCLUDE)
34861 smclass = 4;
34862 else if (flags & SECTION_DEBUG)
34864 fprintf (asm_out_file, "\t.dwsect %s\n", name);
34865 return;
34867 else if (flags & SECTION_CODE)
34868 smclass = 0;
34869 else if (flags & SECTION_TLS)
34870 smclass = 3;
34871 else if (flags & SECTION_WRITE)
34872 smclass = 2;
34873 else
34874 smclass = 1;
34876 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
34877 (flags & SECTION_CODE) ? "." : "",
34878 name, suffix[smclass], flags & SECTION_ENTSIZE);
34881 #define IN_NAMED_SECTION(DECL) \
34882 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
34883 && DECL_SECTION_NAME (DECL) != NULL)
34885 static section *
34886 rs6000_xcoff_select_section (tree decl, int reloc,
34887 unsigned HOST_WIDE_INT align)
34889 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
34890 named section. */
34891 if (align > BIGGEST_ALIGNMENT)
34893 resolve_unique_section (decl, reloc, true);
34894 if (IN_NAMED_SECTION (decl))
34895 return get_named_section (decl, NULL, reloc);
34898 if (decl_readonly_section (decl, reloc))
34900 if (TREE_PUBLIC (decl))
34901 return read_only_data_section;
34902 else
34903 return read_only_private_data_section;
34905 else
34907 #if HAVE_AS_TLS
34908 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34910 if (TREE_PUBLIC (decl))
34911 return tls_data_section;
34912 else if (bss_initializer_p (decl))
34914 /* Convert to COMMON to emit in BSS. */
34915 DECL_COMMON (decl) = 1;
34916 return tls_comm_section;
34918 else
34919 return tls_private_data_section;
34921 else
34922 #endif
34923 if (TREE_PUBLIC (decl))
34924 return data_section;
34925 else
34926 return private_data_section;
34930 static void
34931 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
34933 const char *name;
34935 /* Use select_section for private data and uninitialized data with
34936 alignment <= BIGGEST_ALIGNMENT. */
34937 if (!TREE_PUBLIC (decl)
34938 || DECL_COMMON (decl)
34939 || (DECL_INITIAL (decl) == NULL_TREE
34940 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
34941 || DECL_INITIAL (decl) == error_mark_node
34942 || (flag_zero_initialized_in_bss
34943 && initializer_zerop (DECL_INITIAL (decl))))
34944 return;
34946 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34947 name = (*targetm.strip_name_encoding) (name);
34948 set_decl_section_name (decl, name);
34951 /* Select section for constant in constant pool.
34953 On RS/6000, all constants are in the private read-only data area.
34954 However, if this is being placed in the TOC it must be output as a
34955 toc entry. */
34957 static section *
34958 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
34959 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
34961 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
34962 return toc_section;
34963 else
34964 return read_only_private_data_section;
34967 /* Remove any trailing [DS] or the like from the symbol name. */
34969 static const char *
34970 rs6000_xcoff_strip_name_encoding (const char *name)
34972 size_t len;
34973 if (*name == '*')
34974 name++;
34975 len = strlen (name);
34976 if (name[len - 1] == ']')
34977 return ggc_alloc_string (name, len - 4);
34978 else
34979 return name;
34982 /* Section attributes. AIX is always PIC. */
34984 static unsigned int
34985 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
34987 unsigned int align;
34988 unsigned int flags = default_section_type_flags (decl, name, reloc);
34990 /* Align to at least UNIT size. */
34991 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
34992 align = MIN_UNITS_PER_WORD;
34993 else
34994 /* Increase alignment of large objects if not already stricter. */
34995 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
34996 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
34997 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
34999 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
35002 /* Output at beginning of assembler file.
35004 Initialize the section names for the RS/6000 at this point.
35006 Specify filename, including full path, to assembler.
35008 We want to go into the TOC section so at least one .toc will be emitted.
35009 Also, in order to output proper .bs/.es pairs, we need at least one static
35010 [RW] section emitted.
35012 Finally, declare mcount when profiling to make the assembler happy. */
35014 static void
35015 rs6000_xcoff_file_start (void)
35017 rs6000_gen_section_name (&xcoff_bss_section_name,
35018 main_input_filename, ".bss_");
35019 rs6000_gen_section_name (&xcoff_private_data_section_name,
35020 main_input_filename, ".rw_");
35021 rs6000_gen_section_name (&xcoff_read_only_section_name,
35022 main_input_filename, ".ro_");
35023 rs6000_gen_section_name (&xcoff_tls_data_section_name,
35024 main_input_filename, ".tls_");
35025 rs6000_gen_section_name (&xcoff_tbss_section_name,
35026 main_input_filename, ".tbss_[UL]");
35028 fputs ("\t.file\t", asm_out_file);
35029 output_quoted_string (asm_out_file, main_input_filename);
35030 fputc ('\n', asm_out_file);
35031 if (write_symbols != NO_DEBUG)
35032 switch_to_section (private_data_section);
35033 switch_to_section (toc_section);
35034 switch_to_section (text_section);
35035 if (profile_flag)
35036 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
35037 rs6000_file_start ();
35040 /* Output at end of assembler file.
35041 On the RS/6000, referencing data should automatically pull in text. */
35043 static void
35044 rs6000_xcoff_file_end (void)
35046 switch_to_section (text_section);
35047 fputs ("_section_.text:\n", asm_out_file);
35048 switch_to_section (data_section);
35049 fputs (TARGET_32BIT
35050 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
35051 asm_out_file);
35054 struct declare_alias_data
35056 FILE *file;
35057 bool function_descriptor;
35060 /* Declare alias N. A helper function for for_node_and_aliases. */
35062 static bool
35063 rs6000_declare_alias (struct symtab_node *n, void *d)
35065 struct declare_alias_data *data = (struct declare_alias_data *)d;
35066 /* Main symbol is output specially, because varasm machinery does part of
35067 the job for us - we do not need to declare .globl/lglobs and such. */
35068 if (!n->alias || n->weakref)
35069 return false;
35071 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
35072 return false;
35074 /* Prevent assemble_alias from trying to use .set pseudo operation
35075 that does not behave as expected by the middle-end. */
35076 TREE_ASM_WRITTEN (n->decl) = true;
35078 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
35079 char *buffer = (char *) alloca (strlen (name) + 2);
35080 char *p;
35081 int dollar_inside = 0;
35083 strcpy (buffer, name);
35084 p = strchr (buffer, '$');
35085 while (p) {
35086 *p = '_';
35087 dollar_inside++;
35088 p = strchr (p + 1, '$');
35090 if (TREE_PUBLIC (n->decl))
35092 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
35094 if (dollar_inside) {
35095 if (data->function_descriptor)
35096 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35097 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35099 if (data->function_descriptor)
35101 fputs ("\t.globl .", data->file);
35102 RS6000_OUTPUT_BASENAME (data->file, buffer);
35103 putc ('\n', data->file);
35105 fputs ("\t.globl ", data->file);
35106 RS6000_OUTPUT_BASENAME (data->file, buffer);
35107 putc ('\n', data->file);
35109 #ifdef ASM_WEAKEN_DECL
35110 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
35111 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
35112 #endif
35114 else
35116 if (dollar_inside)
35118 if (data->function_descriptor)
35119 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35120 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35122 if (data->function_descriptor)
35124 fputs ("\t.lglobl .", data->file);
35125 RS6000_OUTPUT_BASENAME (data->file, buffer);
35126 putc ('\n', data->file);
35128 fputs ("\t.lglobl ", data->file);
35129 RS6000_OUTPUT_BASENAME (data->file, buffer);
35130 putc ('\n', data->file);
35132 if (data->function_descriptor)
35133 fputs (".", data->file);
35134 RS6000_OUTPUT_BASENAME (data->file, buffer);
35135 fputs (":\n", data->file);
35136 return false;
35139 /* This macro produces the initial definition of a function name.
35140 On the RS/6000, we need to place an extra '.' in the function name and
35141 output the function descriptor.
35142 Dollar signs are converted to underscores.
35144 The csect for the function will have already been created when
35145 text_section was selected. We do have to go back to that csect, however.
35147 The third and fourth parameters to the .function pseudo-op (16 and 044)
35148 are placeholders which no longer have any use.
35150 Because AIX assembler's .set command has unexpected semantics, we output
35151 all aliases as alternative labels in front of the definition. */
35153 void
35154 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
35156 char *buffer = (char *) alloca (strlen (name) + 1);
35157 char *p;
35158 int dollar_inside = 0;
35159 struct declare_alias_data data = {file, false};
35161 strcpy (buffer, name);
35162 p = strchr (buffer, '$');
35163 while (p) {
35164 *p = '_';
35165 dollar_inside++;
35166 p = strchr (p + 1, '$');
35168 if (TREE_PUBLIC (decl))
35170 if (!RS6000_WEAK || !DECL_WEAK (decl))
35172 if (dollar_inside) {
35173 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35174 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35176 fputs ("\t.globl .", file);
35177 RS6000_OUTPUT_BASENAME (file, buffer);
35178 putc ('\n', file);
35181 else
35183 if (dollar_inside) {
35184 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35185 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35187 fputs ("\t.lglobl .", file);
35188 RS6000_OUTPUT_BASENAME (file, buffer);
35189 putc ('\n', file);
35191 fputs ("\t.csect ", file);
35192 RS6000_OUTPUT_BASENAME (file, buffer);
35193 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
35194 RS6000_OUTPUT_BASENAME (file, buffer);
35195 fputs (":\n", file);
35196 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
35197 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
35198 RS6000_OUTPUT_BASENAME (file, buffer);
35199 fputs (", TOC[tc0], 0\n", file);
35200 in_section = NULL;
35201 switch_to_section (function_section (decl));
35202 putc ('.', file);
35203 RS6000_OUTPUT_BASENAME (file, buffer);
35204 fputs (":\n", file);
35205 data.function_descriptor = true;
35206 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
35207 if (!DECL_IGNORED_P (decl))
35209 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35210 xcoffout_declare_function (file, decl, buffer);
35211 else if (write_symbols == DWARF2_DEBUG)
35213 name = (*targetm.strip_name_encoding) (name);
35214 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
35217 return;
35220 /* This macro produces the initial definition of a object (variable) name.
35221 Because AIX assembler's .set command has unexpected semantics, we output
35222 all aliases as alternative labels in front of the definition. */
35224 void
35225 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
35227 struct declare_alias_data data = {file, false};
35228 RS6000_OUTPUT_BASENAME (file, name);
35229 fputs (":\n", file);
35230 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
35233 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
35235 void
35236 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
35238 fputs (integer_asm_op (size, FALSE), file);
35239 assemble_name (file, label);
35240 fputs ("-$", file);
35243 /* Output a symbol offset relative to the dbase for the current object.
35244 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
35245 signed offsets.
35247 __gcc_unwind_dbase is embedded in all executables/libraries through
35248 libgcc/config/rs6000/crtdbase.S. */
35250 void
35251 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
35253 fputs (integer_asm_op (size, FALSE), file);
35254 assemble_name (file, label);
35255 fputs("-__gcc_unwind_dbase", file);
35258 #ifdef HAVE_AS_TLS
35259 static void
35260 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
35262 rtx symbol;
35263 int flags;
35264 const char *symname;
35266 default_encode_section_info (decl, rtl, first);
35268 /* Careful not to prod global register variables. */
35269 if (!MEM_P (rtl))
35270 return;
35271 symbol = XEXP (rtl, 0);
35272 if (GET_CODE (symbol) != SYMBOL_REF)
35273 return;
35275 flags = SYMBOL_REF_FLAGS (symbol);
35277 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
35278 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
35280 SYMBOL_REF_FLAGS (symbol) = flags;
35282 /* Append mapping class to extern decls. */
35283 symname = XSTR (symbol, 0);
35284 if (decl /* sync condition with assemble_external () */
35285 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
35286 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
35287 || TREE_CODE (decl) == FUNCTION_DECL)
35288 && symname[strlen (symname) - 1] != ']')
35290 char *newname = (char *) alloca (strlen (symname) + 5);
35291 strcpy (newname, symname);
35292 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
35293 ? "[DS]" : "[UA]"));
35294 XSTR (symbol, 0) = ggc_strdup (newname);
35297 #endif /* HAVE_AS_TLS */
35298 #endif /* TARGET_XCOFF */
35300 /* Return true if INSN should not be copied. */
35302 static bool
35303 rs6000_cannot_copy_insn_p (rtx_insn *insn)
35305 return recog_memoized (insn) >= 0
35306 && get_attr_cannot_copy (insn);
35309 /* Compute a (partial) cost for rtx X. Return true if the complete
35310 cost has been computed, and false if subexpressions should be
35311 scanned. In either case, *TOTAL contains the cost result. */
35313 static bool
35314 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
35315 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
35317 int code = GET_CODE (x);
35319 switch (code)
35321 /* On the RS/6000, if it is valid in the insn, it is free. */
35322 case CONST_INT:
35323 if (((outer_code == SET
35324 || outer_code == PLUS
35325 || outer_code == MINUS)
35326 && (satisfies_constraint_I (x)
35327 || satisfies_constraint_L (x)))
35328 || (outer_code == AND
35329 && (satisfies_constraint_K (x)
35330 || (mode == SImode
35331 ? satisfies_constraint_L (x)
35332 : satisfies_constraint_J (x))))
35333 || ((outer_code == IOR || outer_code == XOR)
35334 && (satisfies_constraint_K (x)
35335 || (mode == SImode
35336 ? satisfies_constraint_L (x)
35337 : satisfies_constraint_J (x))))
35338 || outer_code == ASHIFT
35339 || outer_code == ASHIFTRT
35340 || outer_code == LSHIFTRT
35341 || outer_code == ROTATE
35342 || outer_code == ROTATERT
35343 || outer_code == ZERO_EXTRACT
35344 || (outer_code == MULT
35345 && satisfies_constraint_I (x))
35346 || ((outer_code == DIV || outer_code == UDIV
35347 || outer_code == MOD || outer_code == UMOD)
35348 && exact_log2 (INTVAL (x)) >= 0)
35349 || (outer_code == COMPARE
35350 && (satisfies_constraint_I (x)
35351 || satisfies_constraint_K (x)))
35352 || ((outer_code == EQ || outer_code == NE)
35353 && (satisfies_constraint_I (x)
35354 || satisfies_constraint_K (x)
35355 || (mode == SImode
35356 ? satisfies_constraint_L (x)
35357 : satisfies_constraint_J (x))))
35358 || (outer_code == GTU
35359 && satisfies_constraint_I (x))
35360 || (outer_code == LTU
35361 && satisfies_constraint_P (x)))
35363 *total = 0;
35364 return true;
35366 else if ((outer_code == PLUS
35367 && reg_or_add_cint_operand (x, VOIDmode))
35368 || (outer_code == MINUS
35369 && reg_or_sub_cint_operand (x, VOIDmode))
35370 || ((outer_code == SET
35371 || outer_code == IOR
35372 || outer_code == XOR)
35373 && (INTVAL (x)
35374 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
35376 *total = COSTS_N_INSNS (1);
35377 return true;
35379 /* FALLTHRU */
35381 case CONST_DOUBLE:
35382 case CONST_WIDE_INT:
35383 case CONST:
35384 case HIGH:
35385 case SYMBOL_REF:
35386 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35387 return true;
35389 case MEM:
35390 /* When optimizing for size, MEM should be slightly more expensive
35391 than generating address, e.g., (plus (reg) (const)).
35392 L1 cache latency is about two instructions. */
35393 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35394 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
35395 *total += COSTS_N_INSNS (100);
35396 return true;
35398 case LABEL_REF:
35399 *total = 0;
35400 return true;
35402 case PLUS:
35403 case MINUS:
35404 if (FLOAT_MODE_P (mode))
35405 *total = rs6000_cost->fp;
35406 else
35407 *total = COSTS_N_INSNS (1);
35408 return false;
35410 case MULT:
35411 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35412 && satisfies_constraint_I (XEXP (x, 1)))
35414 if (INTVAL (XEXP (x, 1)) >= -256
35415 && INTVAL (XEXP (x, 1)) <= 255)
35416 *total = rs6000_cost->mulsi_const9;
35417 else
35418 *total = rs6000_cost->mulsi_const;
35420 else if (mode == SFmode)
35421 *total = rs6000_cost->fp;
35422 else if (FLOAT_MODE_P (mode))
35423 *total = rs6000_cost->dmul;
35424 else if (mode == DImode)
35425 *total = rs6000_cost->muldi;
35426 else
35427 *total = rs6000_cost->mulsi;
35428 return false;
35430 case FMA:
35431 if (mode == SFmode)
35432 *total = rs6000_cost->fp;
35433 else
35434 *total = rs6000_cost->dmul;
35435 break;
35437 case DIV:
35438 case MOD:
35439 if (FLOAT_MODE_P (mode))
35441 *total = mode == DFmode ? rs6000_cost->ddiv
35442 : rs6000_cost->sdiv;
35443 return false;
35445 /* FALLTHRU */
35447 case UDIV:
35448 case UMOD:
35449 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35450 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
35452 if (code == DIV || code == MOD)
35453 /* Shift, addze */
35454 *total = COSTS_N_INSNS (2);
35455 else
35456 /* Shift */
35457 *total = COSTS_N_INSNS (1);
35459 else
35461 if (GET_MODE (XEXP (x, 1)) == DImode)
35462 *total = rs6000_cost->divdi;
35463 else
35464 *total = rs6000_cost->divsi;
35466 /* Add in shift and subtract for MOD unless we have a mod instruction. */
35467 if (!TARGET_MODULO && (code == MOD || code == UMOD))
35468 *total += COSTS_N_INSNS (2);
35469 return false;
35471 case CTZ:
35472 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
35473 return false;
35475 case FFS:
35476 *total = COSTS_N_INSNS (4);
35477 return false;
35479 case POPCOUNT:
35480 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
35481 return false;
35483 case PARITY:
35484 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
35485 return false;
35487 case NOT:
35488 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
35489 *total = 0;
35490 else
35491 *total = COSTS_N_INSNS (1);
35492 return false;
35494 case AND:
35495 if (CONST_INT_P (XEXP (x, 1)))
35497 rtx left = XEXP (x, 0);
35498 rtx_code left_code = GET_CODE (left);
35500 /* rotate-and-mask: 1 insn. */
35501 if ((left_code == ROTATE
35502 || left_code == ASHIFT
35503 || left_code == LSHIFTRT)
35504 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
35506 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
35507 if (!CONST_INT_P (XEXP (left, 1)))
35508 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
35509 *total += COSTS_N_INSNS (1);
35510 return true;
35513 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
35514 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
35515 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
35516 || (val & 0xffff) == val
35517 || (val & 0xffff0000) == val
35518 || ((val & 0xffff) == 0 && mode == SImode))
35520 *total = rtx_cost (left, mode, AND, 0, speed);
35521 *total += COSTS_N_INSNS (1);
35522 return true;
35525 /* 2 insns. */
35526 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
35528 *total = rtx_cost (left, mode, AND, 0, speed);
35529 *total += COSTS_N_INSNS (2);
35530 return true;
35534 *total = COSTS_N_INSNS (1);
35535 return false;
35537 case IOR:
35538 /* FIXME */
35539 *total = COSTS_N_INSNS (1);
35540 return true;
35542 case CLZ:
35543 case XOR:
35544 case ZERO_EXTRACT:
35545 *total = COSTS_N_INSNS (1);
35546 return false;
35548 case ASHIFT:
35549 /* The EXTSWSLI instruction is a combined instruction. Don't count both
35550 the sign extend and shift separately within the insn. */
35551 if (TARGET_EXTSWSLI && mode == DImode
35552 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
35553 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
35555 *total = 0;
35556 return false;
35558 /* fall through */
35560 case ASHIFTRT:
35561 case LSHIFTRT:
35562 case ROTATE:
35563 case ROTATERT:
35564 /* Handle mul_highpart. */
35565 if (outer_code == TRUNCATE
35566 && GET_CODE (XEXP (x, 0)) == MULT)
35568 if (mode == DImode)
35569 *total = rs6000_cost->muldi;
35570 else
35571 *total = rs6000_cost->mulsi;
35572 return true;
35574 else if (outer_code == AND)
35575 *total = 0;
35576 else
35577 *total = COSTS_N_INSNS (1);
35578 return false;
35580 case SIGN_EXTEND:
35581 case ZERO_EXTEND:
35582 if (GET_CODE (XEXP (x, 0)) == MEM)
35583 *total = 0;
35584 else
35585 *total = COSTS_N_INSNS (1);
35586 return false;
35588 case COMPARE:
35589 case NEG:
35590 case ABS:
35591 if (!FLOAT_MODE_P (mode))
35593 *total = COSTS_N_INSNS (1);
35594 return false;
35596 /* FALLTHRU */
35598 case FLOAT:
35599 case UNSIGNED_FLOAT:
35600 case FIX:
35601 case UNSIGNED_FIX:
35602 case FLOAT_TRUNCATE:
35603 *total = rs6000_cost->fp;
35604 return false;
35606 case FLOAT_EXTEND:
35607 if (mode == DFmode)
35608 *total = rs6000_cost->sfdf_convert;
35609 else
35610 *total = rs6000_cost->fp;
35611 return false;
35613 case UNSPEC:
35614 switch (XINT (x, 1))
35616 case UNSPEC_FRSP:
35617 *total = rs6000_cost->fp;
35618 return true;
35620 default:
35621 break;
35623 break;
35625 case CALL:
35626 case IF_THEN_ELSE:
35627 if (!speed)
35629 *total = COSTS_N_INSNS (1);
35630 return true;
35632 else if (FLOAT_MODE_P (mode)
35633 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
35635 *total = rs6000_cost->fp;
35636 return false;
35638 break;
35640 case NE:
35641 case EQ:
35642 case GTU:
35643 case LTU:
35644 /* Carry bit requires mode == Pmode.
35645 NEG or PLUS already counted so only add one. */
35646 if (mode == Pmode
35647 && (outer_code == NEG || outer_code == PLUS))
35649 *total = COSTS_N_INSNS (1);
35650 return true;
35652 if (outer_code == SET)
35654 if (XEXP (x, 1) == const0_rtx)
35656 if (TARGET_ISEL && !TARGET_MFCRF)
35657 *total = COSTS_N_INSNS (8);
35658 else
35659 *total = COSTS_N_INSNS (2);
35660 return true;
35662 else
35664 *total = COSTS_N_INSNS (3);
35665 return false;
35668 /* FALLTHRU */
35670 case GT:
35671 case LT:
35672 case UNORDERED:
35673 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
35675 if (TARGET_ISEL && !TARGET_MFCRF)
35676 *total = COSTS_N_INSNS (8);
35677 else
35678 *total = COSTS_N_INSNS (2);
35679 return true;
35681 /* CC COMPARE. */
35682 if (outer_code == COMPARE)
35684 *total = 0;
35685 return true;
35687 break;
35689 default:
35690 break;
35693 return false;
35696 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
35698 static bool
35699 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
35700 int opno, int *total, bool speed)
35702 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
35704 fprintf (stderr,
35705 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
35706 "opno = %d, total = %d, speed = %s, x:\n",
35707 ret ? "complete" : "scan inner",
35708 GET_MODE_NAME (mode),
35709 GET_RTX_NAME (outer_code),
35710 opno,
35711 *total,
35712 speed ? "true" : "false");
35714 debug_rtx (x);
35716 return ret;
35719 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
35721 static int
35722 rs6000_debug_address_cost (rtx x, machine_mode mode,
35723 addr_space_t as, bool speed)
35725 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
35727 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
35728 ret, speed ? "true" : "false");
35729 debug_rtx (x);
35731 return ret;
35735 /* A C expression returning the cost of moving data from a register of class
35736 CLASS1 to one of CLASS2. */
35738 static int
35739 rs6000_register_move_cost (machine_mode mode,
35740 reg_class_t from, reg_class_t to)
35742 int ret;
35744 if (TARGET_DEBUG_COST)
35745 dbg_cost_ctrl++;
35747 /* Moves from/to GENERAL_REGS. */
35748 if (reg_classes_intersect_p (to, GENERAL_REGS)
35749 || reg_classes_intersect_p (from, GENERAL_REGS))
35751 reg_class_t rclass = from;
35753 if (! reg_classes_intersect_p (to, GENERAL_REGS))
35754 rclass = to;
35756 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
35757 ret = (rs6000_memory_move_cost (mode, rclass, false)
35758 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
35760 /* It's more expensive to move CR_REGS than CR0_REGS because of the
35761 shift. */
35762 else if (rclass == CR_REGS)
35763 ret = 4;
35765 /* For those processors that have slow LR/CTR moves, make them more
35766 expensive than memory in order to bias spills to memory .*/
35767 else if ((rs6000_cpu == PROCESSOR_POWER6
35768 || rs6000_cpu == PROCESSOR_POWER7
35769 || rs6000_cpu == PROCESSOR_POWER8
35770 || rs6000_cpu == PROCESSOR_POWER9)
35771 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
35772 ret = 6 * hard_regno_nregs[0][mode];
35774 else
35775 /* A move will cost one instruction per GPR moved. */
35776 ret = 2 * hard_regno_nregs[0][mode];
35779 /* If we have VSX, we can easily move between FPR or Altivec registers. */
35780 else if (VECTOR_MEM_VSX_P (mode)
35781 && reg_classes_intersect_p (to, VSX_REGS)
35782 && reg_classes_intersect_p (from, VSX_REGS))
35783 ret = 2 * hard_regno_nregs[32][mode];
35785 /* Moving between two similar registers is just one instruction. */
35786 else if (reg_classes_intersect_p (to, from))
35787 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
35789 /* Everything else has to go through GENERAL_REGS. */
35790 else
35791 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
35792 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
35794 if (TARGET_DEBUG_COST)
35796 if (dbg_cost_ctrl == 1)
35797 fprintf (stderr,
35798 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
35799 ret, GET_MODE_NAME (mode), reg_class_names[from],
35800 reg_class_names[to]);
35801 dbg_cost_ctrl--;
35804 return ret;
35807 /* A C expressions returning the cost of moving data of MODE from a register to
35808 or from memory. */
35810 static int
35811 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
35812 bool in ATTRIBUTE_UNUSED)
35814 int ret;
35816 if (TARGET_DEBUG_COST)
35817 dbg_cost_ctrl++;
35819 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
35820 ret = 4 * hard_regno_nregs[0][mode];
35821 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
35822 || reg_classes_intersect_p (rclass, VSX_REGS)))
35823 ret = 4 * hard_regno_nregs[32][mode];
35824 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
35825 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
35826 else
35827 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
35829 if (TARGET_DEBUG_COST)
35831 if (dbg_cost_ctrl == 1)
35832 fprintf (stderr,
35833 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
35834 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
35835 dbg_cost_ctrl--;
35838 return ret;
35841 /* Returns a code for a target-specific builtin that implements
35842 reciprocal of the function, or NULL_TREE if not available. */
35844 static tree
35845 rs6000_builtin_reciprocal (tree fndecl)
35847 switch (DECL_FUNCTION_CODE (fndecl))
35849 case VSX_BUILTIN_XVSQRTDP:
35850 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
35851 return NULL_TREE;
35853 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
35855 case VSX_BUILTIN_XVSQRTSP:
35856 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
35857 return NULL_TREE;
35859 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
35861 default:
35862 return NULL_TREE;
35866 /* Load up a constant. If the mode is a vector mode, splat the value across
35867 all of the vector elements. */
35869 static rtx
35870 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
35872 rtx reg;
35874 if (mode == SFmode || mode == DFmode)
35876 rtx d = const_double_from_real_value (dconst, mode);
35877 reg = force_reg (mode, d);
35879 else if (mode == V4SFmode)
35881 rtx d = const_double_from_real_value (dconst, SFmode);
35882 rtvec v = gen_rtvec (4, d, d, d, d);
35883 reg = gen_reg_rtx (mode);
35884 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35886 else if (mode == V2DFmode)
35888 rtx d = const_double_from_real_value (dconst, DFmode);
35889 rtvec v = gen_rtvec (2, d, d);
35890 reg = gen_reg_rtx (mode);
35891 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35893 else
35894 gcc_unreachable ();
35896 return reg;
35899 /* Generate an FMA instruction. */
35901 static void
35902 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
35904 machine_mode mode = GET_MODE (target);
35905 rtx dst;
35907 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
35908 gcc_assert (dst != NULL);
35910 if (dst != target)
35911 emit_move_insn (target, dst);
35914 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
35916 static void
35917 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
35919 machine_mode mode = GET_MODE (dst);
35920 rtx r;
35922 /* This is a tad more complicated, since the fnma_optab is for
35923 a different expression: fma(-m1, m2, a), which is the same
35924 thing except in the case of signed zeros.
35926 Fortunately we know that if FMA is supported that FNMSUB is
35927 also supported in the ISA. Just expand it directly. */
35929 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
35931 r = gen_rtx_NEG (mode, a);
35932 r = gen_rtx_FMA (mode, m1, m2, r);
35933 r = gen_rtx_NEG (mode, r);
35934 emit_insn (gen_rtx_SET (dst, r));
35937 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
35938 add a reg_note saying that this was a division. Support both scalar and
35939 vector divide. Assumes no trapping math and finite arguments. */
35941 void
35942 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
35944 machine_mode mode = GET_MODE (dst);
35945 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
35946 int i;
35948 /* Low precision estimates guarantee 5 bits of accuracy. High
35949 precision estimates guarantee 14 bits of accuracy. SFmode
35950 requires 23 bits of accuracy. DFmode requires 52 bits of
35951 accuracy. Each pass at least doubles the accuracy, leading
35952 to the following. */
35953 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35954 if (mode == DFmode || mode == V2DFmode)
35955 passes++;
35957 enum insn_code code = optab_handler (smul_optab, mode);
35958 insn_gen_fn gen_mul = GEN_FCN (code);
35960 gcc_assert (code != CODE_FOR_nothing);
35962 one = rs6000_load_constant_and_splat (mode, dconst1);
35964 /* x0 = 1./d estimate */
35965 x0 = gen_reg_rtx (mode);
35966 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
35967 UNSPEC_FRES)));
35969 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
35970 if (passes > 1) {
35972 /* e0 = 1. - d * x0 */
35973 e0 = gen_reg_rtx (mode);
35974 rs6000_emit_nmsub (e0, d, x0, one);
35976 /* x1 = x0 + e0 * x0 */
35977 x1 = gen_reg_rtx (mode);
35978 rs6000_emit_madd (x1, e0, x0, x0);
35980 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
35981 ++i, xprev = xnext, eprev = enext) {
35983 /* enext = eprev * eprev */
35984 enext = gen_reg_rtx (mode);
35985 emit_insn (gen_mul (enext, eprev, eprev));
35987 /* xnext = xprev + enext * xprev */
35988 xnext = gen_reg_rtx (mode);
35989 rs6000_emit_madd (xnext, enext, xprev, xprev);
35992 } else
35993 xprev = x0;
35995 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
35997 /* u = n * xprev */
35998 u = gen_reg_rtx (mode);
35999 emit_insn (gen_mul (u, n, xprev));
36001 /* v = n - (d * u) */
36002 v = gen_reg_rtx (mode);
36003 rs6000_emit_nmsub (v, d, u, n);
36005 /* dst = (v * xprev) + u */
36006 rs6000_emit_madd (dst, v, xprev, u);
36008 if (note_p)
36009 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
36012 /* Goldschmidt's Algorithm for single/double-precision floating point
36013 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
36015 void
36016 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
36018 machine_mode mode = GET_MODE (src);
36019 rtx e = gen_reg_rtx (mode);
36020 rtx g = gen_reg_rtx (mode);
36021 rtx h = gen_reg_rtx (mode);
36023 /* Low precision estimates guarantee 5 bits of accuracy. High
36024 precision estimates guarantee 14 bits of accuracy. SFmode
36025 requires 23 bits of accuracy. DFmode requires 52 bits of
36026 accuracy. Each pass at least doubles the accuracy, leading
36027 to the following. */
36028 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
36029 if (mode == DFmode || mode == V2DFmode)
36030 passes++;
36032 int i;
36033 rtx mhalf;
36034 enum insn_code code = optab_handler (smul_optab, mode);
36035 insn_gen_fn gen_mul = GEN_FCN (code);
36037 gcc_assert (code != CODE_FOR_nothing);
36039 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
36041 /* e = rsqrt estimate */
36042 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
36043 UNSPEC_RSQRT)));
36045 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
36046 if (!recip)
36048 rtx zero = force_reg (mode, CONST0_RTX (mode));
36050 if (mode == SFmode)
36052 rtx target = emit_conditional_move (e, GT, src, zero, mode,
36053 e, zero, mode, 0);
36054 if (target != e)
36055 emit_move_insn (e, target);
36057 else
36059 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
36060 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
36064 /* g = sqrt estimate. */
36065 emit_insn (gen_mul (g, e, src));
36066 /* h = 1/(2*sqrt) estimate. */
36067 emit_insn (gen_mul (h, e, mhalf));
36069 if (recip)
36071 if (passes == 1)
36073 rtx t = gen_reg_rtx (mode);
36074 rs6000_emit_nmsub (t, g, h, mhalf);
36075 /* Apply correction directly to 1/rsqrt estimate. */
36076 rs6000_emit_madd (dst, e, t, e);
36078 else
36080 for (i = 0; i < passes; i++)
36082 rtx t1 = gen_reg_rtx (mode);
36083 rtx g1 = gen_reg_rtx (mode);
36084 rtx h1 = gen_reg_rtx (mode);
36086 rs6000_emit_nmsub (t1, g, h, mhalf);
36087 rs6000_emit_madd (g1, g, t1, g);
36088 rs6000_emit_madd (h1, h, t1, h);
36090 g = g1;
36091 h = h1;
36093 /* Multiply by 2 for 1/rsqrt. */
36094 emit_insn (gen_add3_insn (dst, h, h));
36097 else
36099 rtx t = gen_reg_rtx (mode);
36100 rs6000_emit_nmsub (t, g, h, mhalf);
36101 rs6000_emit_madd (dst, g, t, g);
36104 return;
36107 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
36108 (Power7) targets. DST is the target, and SRC is the argument operand. */
36110 void
36111 rs6000_emit_popcount (rtx dst, rtx src)
36113 machine_mode mode = GET_MODE (dst);
36114 rtx tmp1, tmp2;
36116 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
36117 if (TARGET_POPCNTD)
36119 if (mode == SImode)
36120 emit_insn (gen_popcntdsi2 (dst, src));
36121 else
36122 emit_insn (gen_popcntddi2 (dst, src));
36123 return;
36126 tmp1 = gen_reg_rtx (mode);
36128 if (mode == SImode)
36130 emit_insn (gen_popcntbsi2 (tmp1, src));
36131 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
36132 NULL_RTX, 0);
36133 tmp2 = force_reg (SImode, tmp2);
36134 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
36136 else
36138 emit_insn (gen_popcntbdi2 (tmp1, src));
36139 tmp2 = expand_mult (DImode, tmp1,
36140 GEN_INT ((HOST_WIDE_INT)
36141 0x01010101 << 32 | 0x01010101),
36142 NULL_RTX, 0);
36143 tmp2 = force_reg (DImode, tmp2);
36144 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
36149 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
36150 target, and SRC is the argument operand. */
36152 void
36153 rs6000_emit_parity (rtx dst, rtx src)
36155 machine_mode mode = GET_MODE (dst);
36156 rtx tmp;
36158 tmp = gen_reg_rtx (mode);
36160 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
36161 if (TARGET_CMPB)
36163 if (mode == SImode)
36165 emit_insn (gen_popcntbsi2 (tmp, src));
36166 emit_insn (gen_paritysi2_cmpb (dst, tmp));
36168 else
36170 emit_insn (gen_popcntbdi2 (tmp, src));
36171 emit_insn (gen_paritydi2_cmpb (dst, tmp));
36173 return;
36176 if (mode == SImode)
36178 /* Is mult+shift >= shift+xor+shift+xor? */
36179 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
36181 rtx tmp1, tmp2, tmp3, tmp4;
36183 tmp1 = gen_reg_rtx (SImode);
36184 emit_insn (gen_popcntbsi2 (tmp1, src));
36186 tmp2 = gen_reg_rtx (SImode);
36187 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
36188 tmp3 = gen_reg_rtx (SImode);
36189 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
36191 tmp4 = gen_reg_rtx (SImode);
36192 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
36193 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
36195 else
36196 rs6000_emit_popcount (tmp, src);
36197 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
36199 else
36201 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
36202 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
36204 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
36206 tmp1 = gen_reg_rtx (DImode);
36207 emit_insn (gen_popcntbdi2 (tmp1, src));
36209 tmp2 = gen_reg_rtx (DImode);
36210 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
36211 tmp3 = gen_reg_rtx (DImode);
36212 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
36214 tmp4 = gen_reg_rtx (DImode);
36215 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
36216 tmp5 = gen_reg_rtx (DImode);
36217 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
36219 tmp6 = gen_reg_rtx (DImode);
36220 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
36221 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
36223 else
36224 rs6000_emit_popcount (tmp, src);
36225 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
36229 /* Expand an Altivec constant permutation for little endian mode.
36230 There are two issues: First, the two input operands must be
36231 swapped so that together they form a double-wide array in LE
36232 order. Second, the vperm instruction has surprising behavior
36233 in LE mode: it interprets the elements of the source vectors
36234 in BE mode ("left to right") and interprets the elements of
36235 the destination vector in LE mode ("right to left"). To
36236 correct for this, we must subtract each element of the permute
36237 control vector from 31.
36239 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
36240 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
36241 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
36242 serve as the permute control vector. Then, in BE mode,
36244 vperm 9,10,11,12
36246 places the desired result in vr9. However, in LE mode the
36247 vector contents will be
36249 vr10 = 00000003 00000002 00000001 00000000
36250 vr11 = 00000007 00000006 00000005 00000004
36252 The result of the vperm using the same permute control vector is
36254 vr9 = 05000000 07000000 01000000 03000000
36256 That is, the leftmost 4 bytes of vr10 are interpreted as the
36257 source for the rightmost 4 bytes of vr9, and so on.
36259 If we change the permute control vector to
36261 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
36263 and issue
36265 vperm 9,11,10,12
36267 we get the desired
36269 vr9 = 00000006 00000004 00000002 00000000. */
36271 void
36272 altivec_expand_vec_perm_const_le (rtx operands[4])
36274 unsigned int i;
36275 rtx perm[16];
36276 rtx constv, unspec;
36277 rtx target = operands[0];
36278 rtx op0 = operands[1];
36279 rtx op1 = operands[2];
36280 rtx sel = operands[3];
36282 /* Unpack and adjust the constant selector. */
36283 for (i = 0; i < 16; ++i)
36285 rtx e = XVECEXP (sel, 0, i);
36286 unsigned int elt = 31 - (INTVAL (e) & 31);
36287 perm[i] = GEN_INT (elt);
36290 /* Expand to a permute, swapping the inputs and using the
36291 adjusted selector. */
36292 if (!REG_P (op0))
36293 op0 = force_reg (V16QImode, op0);
36294 if (!REG_P (op1))
36295 op1 = force_reg (V16QImode, op1);
36297 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
36298 constv = force_reg (V16QImode, constv);
36299 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
36300 UNSPEC_VPERM);
36301 if (!REG_P (target))
36303 rtx tmp = gen_reg_rtx (V16QImode);
36304 emit_move_insn (tmp, unspec);
36305 unspec = tmp;
36308 emit_move_insn (target, unspec);
36311 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
36312 permute control vector. But here it's not a constant, so we must
36313 generate a vector NAND or NOR to do the adjustment. */
36315 void
36316 altivec_expand_vec_perm_le (rtx operands[4])
36318 rtx notx, iorx, unspec;
36319 rtx target = operands[0];
36320 rtx op0 = operands[1];
36321 rtx op1 = operands[2];
36322 rtx sel = operands[3];
36323 rtx tmp = target;
36324 rtx norreg = gen_reg_rtx (V16QImode);
36325 machine_mode mode = GET_MODE (target);
36327 /* Get everything in regs so the pattern matches. */
36328 if (!REG_P (op0))
36329 op0 = force_reg (mode, op0);
36330 if (!REG_P (op1))
36331 op1 = force_reg (mode, op1);
36332 if (!REG_P (sel))
36333 sel = force_reg (V16QImode, sel);
36334 if (!REG_P (target))
36335 tmp = gen_reg_rtx (mode);
36337 if (TARGET_P9_VECTOR)
36339 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
36340 UNSPEC_VPERMR);
36342 else
36344 /* Invert the selector with a VNAND if available, else a VNOR.
36345 The VNAND is preferred for future fusion opportunities. */
36346 notx = gen_rtx_NOT (V16QImode, sel);
36347 iorx = (TARGET_P8_VECTOR
36348 ? gen_rtx_IOR (V16QImode, notx, notx)
36349 : gen_rtx_AND (V16QImode, notx, notx));
36350 emit_insn (gen_rtx_SET (norreg, iorx));
36352 /* Permute with operands reversed and adjusted selector. */
36353 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
36354 UNSPEC_VPERM);
36357 /* Copy into target, possibly by way of a register. */
36358 if (!REG_P (target))
36360 emit_move_insn (tmp, unspec);
36361 unspec = tmp;
36364 emit_move_insn (target, unspec);
36367 /* Expand an Altivec constant permutation. Return true if we match
36368 an efficient implementation; false to fall back to VPERM. */
36370 bool
36371 altivec_expand_vec_perm_const (rtx operands[4])
36373 struct altivec_perm_insn {
36374 HOST_WIDE_INT mask;
36375 enum insn_code impl;
36376 unsigned char perm[16];
36378 static const struct altivec_perm_insn patterns[] = {
36379 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
36380 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
36381 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
36382 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
36383 { OPTION_MASK_ALTIVEC,
36384 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
36385 : CODE_FOR_altivec_vmrglb_direct),
36386 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
36387 { OPTION_MASK_ALTIVEC,
36388 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
36389 : CODE_FOR_altivec_vmrglh_direct),
36390 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
36391 { OPTION_MASK_ALTIVEC,
36392 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
36393 : CODE_FOR_altivec_vmrglw_direct),
36394 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
36395 { OPTION_MASK_ALTIVEC,
36396 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
36397 : CODE_FOR_altivec_vmrghb_direct),
36398 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
36399 { OPTION_MASK_ALTIVEC,
36400 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
36401 : CODE_FOR_altivec_vmrghh_direct),
36402 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
36403 { OPTION_MASK_ALTIVEC,
36404 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
36405 : CODE_FOR_altivec_vmrghw_direct),
36406 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
36407 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
36408 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
36409 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
36410 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
36413 unsigned int i, j, elt, which;
36414 unsigned char perm[16];
36415 rtx target, op0, op1, sel, x;
36416 bool one_vec;
36418 target = operands[0];
36419 op0 = operands[1];
36420 op1 = operands[2];
36421 sel = operands[3];
36423 /* Unpack the constant selector. */
36424 for (i = which = 0; i < 16; ++i)
36426 rtx e = XVECEXP (sel, 0, i);
36427 elt = INTVAL (e) & 31;
36428 which |= (elt < 16 ? 1 : 2);
36429 perm[i] = elt;
36432 /* Simplify the constant selector based on operands. */
36433 switch (which)
36435 default:
36436 gcc_unreachable ();
36438 case 3:
36439 one_vec = false;
36440 if (!rtx_equal_p (op0, op1))
36441 break;
36442 /* FALLTHRU */
36444 case 2:
36445 for (i = 0; i < 16; ++i)
36446 perm[i] &= 15;
36447 op0 = op1;
36448 one_vec = true;
36449 break;
36451 case 1:
36452 op1 = op0;
36453 one_vec = true;
36454 break;
36457 /* Look for splat patterns. */
36458 if (one_vec)
36460 elt = perm[0];
36462 for (i = 0; i < 16; ++i)
36463 if (perm[i] != elt)
36464 break;
36465 if (i == 16)
36467 if (!BYTES_BIG_ENDIAN)
36468 elt = 15 - elt;
36469 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
36470 return true;
36473 if (elt % 2 == 0)
36475 for (i = 0; i < 16; i += 2)
36476 if (perm[i] != elt || perm[i + 1] != elt + 1)
36477 break;
36478 if (i == 16)
36480 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
36481 x = gen_reg_rtx (V8HImode);
36482 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
36483 GEN_INT (field)));
36484 emit_move_insn (target, gen_lowpart (V16QImode, x));
36485 return true;
36489 if (elt % 4 == 0)
36491 for (i = 0; i < 16; i += 4)
36492 if (perm[i] != elt
36493 || perm[i + 1] != elt + 1
36494 || perm[i + 2] != elt + 2
36495 || perm[i + 3] != elt + 3)
36496 break;
36497 if (i == 16)
36499 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
36500 x = gen_reg_rtx (V4SImode);
36501 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
36502 GEN_INT (field)));
36503 emit_move_insn (target, gen_lowpart (V16QImode, x));
36504 return true;
36509 /* Look for merge and pack patterns. */
36510 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
36512 bool swapped;
36514 if ((patterns[j].mask & rs6000_isa_flags) == 0)
36515 continue;
36517 elt = patterns[j].perm[0];
36518 if (perm[0] == elt)
36519 swapped = false;
36520 else if (perm[0] == elt + 16)
36521 swapped = true;
36522 else
36523 continue;
36524 for (i = 1; i < 16; ++i)
36526 elt = patterns[j].perm[i];
36527 if (swapped)
36528 elt = (elt >= 16 ? elt - 16 : elt + 16);
36529 else if (one_vec && elt >= 16)
36530 elt -= 16;
36531 if (perm[i] != elt)
36532 break;
36534 if (i == 16)
36536 enum insn_code icode = patterns[j].impl;
36537 machine_mode omode = insn_data[icode].operand[0].mode;
36538 machine_mode imode = insn_data[icode].operand[1].mode;
36540 /* For little-endian, don't use vpkuwum and vpkuhum if the
36541 underlying vector type is not V4SI and V8HI, respectively.
36542 For example, using vpkuwum with a V8HI picks up the even
36543 halfwords (BE numbering) when the even halfwords (LE
36544 numbering) are what we need. */
36545 if (!BYTES_BIG_ENDIAN
36546 && icode == CODE_FOR_altivec_vpkuwum_direct
36547 && ((GET_CODE (op0) == REG
36548 && GET_MODE (op0) != V4SImode)
36549 || (GET_CODE (op0) == SUBREG
36550 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
36551 continue;
36552 if (!BYTES_BIG_ENDIAN
36553 && icode == CODE_FOR_altivec_vpkuhum_direct
36554 && ((GET_CODE (op0) == REG
36555 && GET_MODE (op0) != V8HImode)
36556 || (GET_CODE (op0) == SUBREG
36557 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
36558 continue;
36560 /* For little-endian, the two input operands must be swapped
36561 (or swapped back) to ensure proper right-to-left numbering
36562 from 0 to 2N-1. */
36563 if (swapped ^ !BYTES_BIG_ENDIAN)
36564 std::swap (op0, op1);
36565 if (imode != V16QImode)
36567 op0 = gen_lowpart (imode, op0);
36568 op1 = gen_lowpart (imode, op1);
36570 if (omode == V16QImode)
36571 x = target;
36572 else
36573 x = gen_reg_rtx (omode);
36574 emit_insn (GEN_FCN (icode) (x, op0, op1));
36575 if (omode != V16QImode)
36576 emit_move_insn (target, gen_lowpart (V16QImode, x));
36577 return true;
36581 if (!BYTES_BIG_ENDIAN)
36583 altivec_expand_vec_perm_const_le (operands);
36584 return true;
36587 return false;
36590 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
36591 Return true if we match an efficient implementation. */
36593 static bool
36594 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
36595 unsigned char perm0, unsigned char perm1)
36597 rtx x;
36599 /* If both selectors come from the same operand, fold to single op. */
36600 if ((perm0 & 2) == (perm1 & 2))
36602 if (perm0 & 2)
36603 op0 = op1;
36604 else
36605 op1 = op0;
36607 /* If both operands are equal, fold to simpler permutation. */
36608 if (rtx_equal_p (op0, op1))
36610 perm0 = perm0 & 1;
36611 perm1 = (perm1 & 1) + 2;
36613 /* If the first selector comes from the second operand, swap. */
36614 else if (perm0 & 2)
36616 if (perm1 & 2)
36617 return false;
36618 perm0 -= 2;
36619 perm1 += 2;
36620 std::swap (op0, op1);
36622 /* If the second selector does not come from the second operand, fail. */
36623 else if ((perm1 & 2) == 0)
36624 return false;
36626 /* Success! */
36627 if (target != NULL)
36629 machine_mode vmode, dmode;
36630 rtvec v;
36632 vmode = GET_MODE (target);
36633 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
36634 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
36635 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
36636 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
36637 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
36638 emit_insn (gen_rtx_SET (target, x));
36640 return true;
36643 bool
36644 rs6000_expand_vec_perm_const (rtx operands[4])
36646 rtx target, op0, op1, sel;
36647 unsigned char perm0, perm1;
36649 target = operands[0];
36650 op0 = operands[1];
36651 op1 = operands[2];
36652 sel = operands[3];
36654 /* Unpack the constant selector. */
36655 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
36656 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
36658 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
36661 /* Test whether a constant permutation is supported. */
36663 static bool
36664 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
36665 const unsigned char *sel)
36667 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
36668 if (TARGET_ALTIVEC)
36669 return true;
36671 /* Check for ps_merge* or evmerge* insns. */
36672 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
36673 || (TARGET_SPE && vmode == V2SImode))
36675 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
36676 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
36677 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
36680 return false;
36683 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
36685 static void
36686 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
36687 machine_mode vmode, unsigned nelt, rtx perm[])
36689 machine_mode imode;
36690 rtx x;
36692 imode = vmode;
36693 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
36695 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
36696 imode = mode_for_vector (imode, nelt);
36699 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
36700 x = expand_vec_perm (vmode, op0, op1, x, target);
36701 if (x != target)
36702 emit_move_insn (target, x);
36705 /* Expand an extract even operation. */
36707 void
36708 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
36710 machine_mode vmode = GET_MODE (target);
36711 unsigned i, nelt = GET_MODE_NUNITS (vmode);
36712 rtx perm[16];
36714 for (i = 0; i < nelt; i++)
36715 perm[i] = GEN_INT (i * 2);
36717 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36720 /* Expand a vector interleave operation. */
36722 void
36723 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
36725 machine_mode vmode = GET_MODE (target);
36726 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
36727 rtx perm[16];
36729 high = (highp ? 0 : nelt / 2);
36730 for (i = 0; i < nelt / 2; i++)
36732 perm[i * 2] = GEN_INT (i + high);
36733 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
36736 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36739 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
36740 void
36741 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
36743 HOST_WIDE_INT hwi_scale (scale);
36744 REAL_VALUE_TYPE r_pow;
36745 rtvec v = rtvec_alloc (2);
36746 rtx elt;
36747 rtx scale_vec = gen_reg_rtx (V2DFmode);
36748 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
36749 elt = const_double_from_real_value (r_pow, DFmode);
36750 RTVEC_ELT (v, 0) = elt;
36751 RTVEC_ELT (v, 1) = elt;
36752 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
36753 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
36756 /* Return an RTX representing where to find the function value of a
36757 function returning MODE. */
36758 static rtx
36759 rs6000_complex_function_value (machine_mode mode)
36761 unsigned int regno;
36762 rtx r1, r2;
36763 machine_mode inner = GET_MODE_INNER (mode);
36764 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
36766 if (TARGET_FLOAT128_TYPE
36767 && (mode == KCmode
36768 || (mode == TCmode && TARGET_IEEEQUAD)))
36769 regno = ALTIVEC_ARG_RETURN;
36771 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36772 regno = FP_ARG_RETURN;
36774 else
36776 regno = GP_ARG_RETURN;
36778 /* 32-bit is OK since it'll go in r3/r4. */
36779 if (TARGET_32BIT && inner_bytes >= 4)
36780 return gen_rtx_REG (mode, regno);
36783 if (inner_bytes >= 8)
36784 return gen_rtx_REG (mode, regno);
36786 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
36787 const0_rtx);
36788 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
36789 GEN_INT (inner_bytes));
36790 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
36793 /* Return an rtx describing a return value of MODE as a PARALLEL
36794 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
36795 stride REG_STRIDE. */
36797 static rtx
36798 rs6000_parallel_return (machine_mode mode,
36799 int n_elts, machine_mode elt_mode,
36800 unsigned int regno, unsigned int reg_stride)
36802 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36804 int i;
36805 for (i = 0; i < n_elts; i++)
36807 rtx r = gen_rtx_REG (elt_mode, regno);
36808 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
36809 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
36810 regno += reg_stride;
36813 return par;
36816 /* Target hook for TARGET_FUNCTION_VALUE.
36818 On the SPE, both FPs and vectors are returned in r3.
36820 On RS/6000 an integer value is in r3 and a floating-point value is in
36821 fp1, unless -msoft-float. */
36823 static rtx
36824 rs6000_function_value (const_tree valtype,
36825 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
36826 bool outgoing ATTRIBUTE_UNUSED)
36828 machine_mode mode;
36829 unsigned int regno;
36830 machine_mode elt_mode;
36831 int n_elts;
36833 /* Special handling for structs in darwin64. */
36834 if (TARGET_MACHO
36835 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
36837 CUMULATIVE_ARGS valcum;
36838 rtx valret;
36840 valcum.words = 0;
36841 valcum.fregno = FP_ARG_MIN_REG;
36842 valcum.vregno = ALTIVEC_ARG_MIN_REG;
36843 /* Do a trial code generation as if this were going to be passed as
36844 an argument; if any part goes in memory, we return NULL. */
36845 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
36846 if (valret)
36847 return valret;
36848 /* Otherwise fall through to standard ABI rules. */
36851 mode = TYPE_MODE (valtype);
36853 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
36854 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
36856 int first_reg, n_regs;
36858 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
36860 /* _Decimal128 must use even/odd register pairs. */
36861 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36862 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
36864 else
36866 first_reg = ALTIVEC_ARG_RETURN;
36867 n_regs = 1;
36870 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
36873 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
36874 if (TARGET_32BIT && TARGET_POWERPC64)
36875 switch (mode)
36877 default:
36878 break;
36879 case DImode:
36880 case SCmode:
36881 case DCmode:
36882 case TCmode:
36883 int count = GET_MODE_SIZE (mode) / 4;
36884 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
36887 if ((INTEGRAL_TYPE_P (valtype)
36888 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
36889 || POINTER_TYPE_P (valtype))
36890 mode = TARGET_32BIT ? SImode : DImode;
36892 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36893 /* _Decimal128 must use an even/odd register pair. */
36894 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36895 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
36896 && !FLOAT128_VECTOR_P (mode)
36897 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
36898 regno = FP_ARG_RETURN;
36899 else if (TREE_CODE (valtype) == COMPLEX_TYPE
36900 && targetm.calls.split_complex_arg)
36901 return rs6000_complex_function_value (mode);
36902 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36903 return register is used in both cases, and we won't see V2DImode/V2DFmode
36904 for pure altivec, combine the two cases. */
36905 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
36906 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
36907 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
36908 regno = ALTIVEC_ARG_RETURN;
36909 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
36910 && (mode == DFmode || mode == DCmode
36911 || FLOAT128_IBM_P (mode) || mode == TCmode))
36912 return spe_build_register_parallel (mode, GP_ARG_RETURN);
36913 else
36914 regno = GP_ARG_RETURN;
36916 return gen_rtx_REG (mode, regno);
36919 /* Define how to find the value returned by a library function
36920 assuming the value has mode MODE. */
36922 rs6000_libcall_value (machine_mode mode)
36924 unsigned int regno;
36926 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
36927 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
36928 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
36930 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36931 /* _Decimal128 must use an even/odd register pair. */
36932 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36933 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
36934 && TARGET_HARD_FLOAT && TARGET_FPRS
36935 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
36936 regno = FP_ARG_RETURN;
36937 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36938 return register is used in both cases, and we won't see V2DImode/V2DFmode
36939 for pure altivec, combine the two cases. */
36940 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
36941 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
36942 regno = ALTIVEC_ARG_RETURN;
36943 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
36944 return rs6000_complex_function_value (mode);
36945 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
36946 && (mode == DFmode || mode == DCmode
36947 || FLOAT128_IBM_P (mode) || mode == TCmode))
36948 return spe_build_register_parallel (mode, GP_ARG_RETURN);
36949 else
36950 regno = GP_ARG_RETURN;
36952 return gen_rtx_REG (mode, regno);
36956 /* Return true if we use LRA instead of reload pass. */
36957 static bool
36958 rs6000_lra_p (void)
36960 return TARGET_LRA;
36963 /* Given FROM and TO register numbers, say whether this elimination is allowed.
36964 Frame pointer elimination is automatically handled.
36966 For the RS/6000, if frame pointer elimination is being done, we would like
36967 to convert ap into fp, not sp.
36969 We need r30 if -mminimal-toc was specified, and there are constant pool
36970 references. */
36972 static bool
36973 rs6000_can_eliminate (const int from, const int to)
36975 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
36976 ? ! frame_pointer_needed
36977 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
36978 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
36979 : true);
36982 /* Define the offset between two registers, FROM to be eliminated and its
36983 replacement TO, at the start of a routine. */
36984 HOST_WIDE_INT
36985 rs6000_initial_elimination_offset (int from, int to)
36987 rs6000_stack_t *info = rs6000_stack_info ();
36988 HOST_WIDE_INT offset;
36990 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36991 offset = info->push_p ? 0 : -info->total_size;
36992 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36994 offset = info->push_p ? 0 : -info->total_size;
36995 if (FRAME_GROWS_DOWNWARD)
36996 offset += info->fixed_size + info->vars_size + info->parm_size;
36998 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36999 offset = FRAME_GROWS_DOWNWARD
37000 ? info->fixed_size + info->vars_size + info->parm_size
37001 : 0;
37002 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
37003 offset = info->total_size;
37004 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37005 offset = info->push_p ? info->total_size : 0;
37006 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
37007 offset = 0;
37008 else
37009 gcc_unreachable ();
37011 return offset;
37014 static rtx
37015 rs6000_dwarf_register_span (rtx reg)
37017 rtx parts[8];
37018 int i, words;
37019 unsigned regno = REGNO (reg);
37020 machine_mode mode = GET_MODE (reg);
37022 if (TARGET_SPE
37023 && regno < 32
37024 && (SPE_VECTOR_MODE (GET_MODE (reg))
37025 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
37026 && mode != SFmode && mode != SDmode && mode != SCmode)))
37028 else
37029 return NULL_RTX;
37031 regno = REGNO (reg);
37033 /* The duality of the SPE register size wreaks all kinds of havoc.
37034 This is a way of distinguishing r0 in 32-bits from r0 in
37035 64-bits. */
37036 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
37037 gcc_assert (words <= 4);
37038 for (i = 0; i < words; i++, regno++)
37040 if (BYTES_BIG_ENDIAN)
37042 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37043 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
37045 else
37047 parts[2 * i] = gen_rtx_REG (SImode, regno);
37048 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37052 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
37055 /* Fill in sizes for SPE register high parts in table used by unwinder. */
37057 static void
37058 rs6000_init_dwarf_reg_sizes_extra (tree address)
37060 if (TARGET_SPE)
37062 int i;
37063 machine_mode mode = TYPE_MODE (char_type_node);
37064 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37065 rtx mem = gen_rtx_MEM (BLKmode, addr);
37066 rtx value = gen_int_mode (4, mode);
37068 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
37070 int column = DWARF_REG_TO_UNWIND_COLUMN
37071 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37072 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37074 emit_move_insn (adjust_address (mem, mode, offset), value);
37078 if (TARGET_MACHO && ! TARGET_ALTIVEC)
37080 int i;
37081 machine_mode mode = TYPE_MODE (char_type_node);
37082 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37083 rtx mem = gen_rtx_MEM (BLKmode, addr);
37084 rtx value = gen_int_mode (16, mode);
37086 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
37087 The unwinder still needs to know the size of Altivec registers. */
37089 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
37091 int column = DWARF_REG_TO_UNWIND_COLUMN
37092 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37093 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37095 emit_move_insn (adjust_address (mem, mode, offset), value);
37100 /* Map internal gcc register numbers to debug format register numbers.
37101 FORMAT specifies the type of debug register number to use:
37102 0 -- debug information, except for frame-related sections
37103 1 -- DWARF .debug_frame section
37104 2 -- DWARF .eh_frame section */
37106 unsigned int
37107 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
37109 /* We never use the GCC internal number for SPE high registers.
37110 Those are mapped to the 1200..1231 range for all debug formats. */
37111 if (SPE_HIGH_REGNO_P (regno))
37112 return regno - FIRST_SPE_HIGH_REGNO + 1200;
37114 /* Except for the above, we use the internal number for non-DWARF
37115 debug information, and also for .eh_frame. */
37116 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
37117 return regno;
37119 /* On some platforms, we use the standard DWARF register
37120 numbering for .debug_info and .debug_frame. */
37121 #ifdef RS6000_USE_DWARF_NUMBERING
37122 if (regno <= 63)
37123 return regno;
37124 if (regno == LR_REGNO)
37125 return 108;
37126 if (regno == CTR_REGNO)
37127 return 109;
37128 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
37129 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
37130 The actual code emitted saves the whole of CR, so we map CR2_REGNO
37131 to the DWARF reg for CR. */
37132 if (format == 1 && regno == CR2_REGNO)
37133 return 64;
37134 if (CR_REGNO_P (regno))
37135 return regno - CR0_REGNO + 86;
37136 if (regno == CA_REGNO)
37137 return 101; /* XER */
37138 if (ALTIVEC_REGNO_P (regno))
37139 return regno - FIRST_ALTIVEC_REGNO + 1124;
37140 if (regno == VRSAVE_REGNO)
37141 return 356;
37142 if (regno == VSCR_REGNO)
37143 return 67;
37144 if (regno == SPE_ACC_REGNO)
37145 return 99;
37146 if (regno == SPEFSCR_REGNO)
37147 return 612;
37148 #endif
37149 return regno;
37152 /* target hook eh_return_filter_mode */
37153 static machine_mode
37154 rs6000_eh_return_filter_mode (void)
37156 return TARGET_32BIT ? SImode : word_mode;
37159 /* Target hook for scalar_mode_supported_p. */
37160 static bool
37161 rs6000_scalar_mode_supported_p (machine_mode mode)
37163 /* -m32 does not support TImode. This is the default, from
37164 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
37165 same ABI as for -m32. But default_scalar_mode_supported_p allows
37166 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
37167 for -mpowerpc64. */
37168 if (TARGET_32BIT && mode == TImode)
37169 return false;
37171 if (DECIMAL_FLOAT_MODE_P (mode))
37172 return default_decimal_float_supported_p ();
37173 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
37174 return true;
37175 else
37176 return default_scalar_mode_supported_p (mode);
37179 /* Target hook for vector_mode_supported_p. */
37180 static bool
37181 rs6000_vector_mode_supported_p (machine_mode mode)
37184 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
37185 return true;
37187 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
37188 return true;
37190 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
37191 128-bit, the compiler might try to widen IEEE 128-bit to IBM
37192 double-double. */
37193 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
37194 return true;
37196 else
37197 return false;
37200 /* Target hook for floatn_mode. */
37201 static machine_mode
37202 rs6000_floatn_mode (int n, bool extended)
37204 if (extended)
37206 switch (n)
37208 case 32:
37209 return DFmode;
37211 case 64:
37212 if (TARGET_FLOAT128_KEYWORD)
37213 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37214 else
37215 return VOIDmode;
37217 case 128:
37218 return VOIDmode;
37220 default:
37221 /* Those are the only valid _FloatNx types. */
37222 gcc_unreachable ();
37225 else
37227 switch (n)
37229 case 32:
37230 return SFmode;
37232 case 64:
37233 return DFmode;
37235 case 128:
37236 if (TARGET_FLOAT128_KEYWORD)
37237 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37238 else
37239 return VOIDmode;
37241 default:
37242 return VOIDmode;
37248 /* Target hook for c_mode_for_suffix. */
37249 static machine_mode
37250 rs6000_c_mode_for_suffix (char suffix)
37252 if (TARGET_FLOAT128_TYPE)
37254 if (suffix == 'q' || suffix == 'Q')
37255 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37257 /* At the moment, we are not defining a suffix for IBM extended double.
37258 If/when the default for -mabi=ieeelongdouble is changed, and we want
37259 to support __ibm128 constants in legacy library code, we may need to
37260 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
37261 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
37262 __float80 constants. */
37265 return VOIDmode;
37268 /* Target hook for invalid_arg_for_unprototyped_fn. */
37269 static const char *
37270 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
37272 return (!rs6000_darwin64_abi
37273 && typelist == 0
37274 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
37275 && (funcdecl == NULL_TREE
37276 || (TREE_CODE (funcdecl) == FUNCTION_DECL
37277 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
37278 ? N_("AltiVec argument passed to unprototyped function")
37279 : NULL;
37282 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
37283 setup by using __stack_chk_fail_local hidden function instead of
37284 calling __stack_chk_fail directly. Otherwise it is better to call
37285 __stack_chk_fail directly. */
37287 static tree ATTRIBUTE_UNUSED
37288 rs6000_stack_protect_fail (void)
37290 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
37291 ? default_hidden_stack_protect_fail ()
37292 : default_external_stack_protect_fail ();
37295 void
37296 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
37297 int num_operands ATTRIBUTE_UNUSED)
37299 if (rs6000_warn_cell_microcode)
37301 const char *temp;
37302 int insn_code_number = recog_memoized (insn);
37303 location_t location = INSN_LOCATION (insn);
37305 /* Punt on insns we cannot recognize. */
37306 if (insn_code_number < 0)
37307 return;
37309 temp = get_insn_template (insn_code_number, insn);
37311 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
37312 warning_at (location, OPT_mwarn_cell_microcode,
37313 "emitting microcode insn %s\t[%s] #%d",
37314 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37315 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
37316 warning_at (location, OPT_mwarn_cell_microcode,
37317 "emitting conditional microcode insn %s\t[%s] #%d",
37318 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37322 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
37324 #if TARGET_ELF
37325 static unsigned HOST_WIDE_INT
37326 rs6000_asan_shadow_offset (void)
37328 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
37330 #endif
37332 /* Mask options that we want to support inside of attribute((target)) and
37333 #pragma GCC target operations. Note, we do not include things like
37334 64/32-bit, endianess, hard/soft floating point, etc. that would have
37335 different calling sequences. */
37337 struct rs6000_opt_mask {
37338 const char *name; /* option name */
37339 HOST_WIDE_INT mask; /* mask to set */
37340 bool invert; /* invert sense of mask */
37341 bool valid_target; /* option is a target option */
37344 static struct rs6000_opt_mask const rs6000_opt_masks[] =
37346 { "altivec", OPTION_MASK_ALTIVEC, false, true },
37347 { "cmpb", OPTION_MASK_CMPB, false, true },
37348 { "crypto", OPTION_MASK_CRYPTO, false, true },
37349 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
37350 { "dlmzb", OPTION_MASK_DLMZB, false, true },
37351 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
37352 false, true },
37353 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
37354 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
37355 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
37356 { "fprnd", OPTION_MASK_FPRND, false, true },
37357 { "hard-dfp", OPTION_MASK_DFP, false, true },
37358 { "htm", OPTION_MASK_HTM, false, true },
37359 { "isel", OPTION_MASK_ISEL, false, true },
37360 { "mfcrf", OPTION_MASK_MFCRF, false, true },
37361 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
37362 { "modulo", OPTION_MASK_MODULO, false, true },
37363 { "mulhw", OPTION_MASK_MULHW, false, true },
37364 { "multiple", OPTION_MASK_MULTIPLE, false, true },
37365 { "popcntb", OPTION_MASK_POPCNTB, false, true },
37366 { "popcntd", OPTION_MASK_POPCNTD, false, true },
37367 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
37368 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
37369 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
37370 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
37371 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
37372 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
37373 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
37374 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
37375 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
37376 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
37377 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
37378 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
37379 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
37380 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
37381 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
37382 { "string", OPTION_MASK_STRING, false, true },
37383 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
37384 { "update", OPTION_MASK_NO_UPDATE, true , true },
37385 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
37386 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
37387 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
37388 { "vsx", OPTION_MASK_VSX, false, true },
37389 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
37390 #ifdef OPTION_MASK_64BIT
37391 #if TARGET_AIX_OS
37392 { "aix64", OPTION_MASK_64BIT, false, false },
37393 { "aix32", OPTION_MASK_64BIT, true, false },
37394 #else
37395 { "64", OPTION_MASK_64BIT, false, false },
37396 { "32", OPTION_MASK_64BIT, true, false },
37397 #endif
37398 #endif
37399 #ifdef OPTION_MASK_EABI
37400 { "eabi", OPTION_MASK_EABI, false, false },
37401 #endif
37402 #ifdef OPTION_MASK_LITTLE_ENDIAN
37403 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
37404 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
37405 #endif
37406 #ifdef OPTION_MASK_RELOCATABLE
37407 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
37408 #endif
37409 #ifdef OPTION_MASK_STRICT_ALIGN
37410 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
37411 #endif
37412 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
37413 { "string", OPTION_MASK_STRING, false, false },
37416 /* Builtin mask mapping for printing the flags. */
37417 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
37419 { "altivec", RS6000_BTM_ALTIVEC, false, false },
37420 { "vsx", RS6000_BTM_VSX, false, false },
37421 { "spe", RS6000_BTM_SPE, false, false },
37422 { "paired", RS6000_BTM_PAIRED, false, false },
37423 { "fre", RS6000_BTM_FRE, false, false },
37424 { "fres", RS6000_BTM_FRES, false, false },
37425 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
37426 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
37427 { "popcntd", RS6000_BTM_POPCNTD, false, false },
37428 { "cell", RS6000_BTM_CELL, false, false },
37429 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
37430 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
37431 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
37432 { "crypto", RS6000_BTM_CRYPTO, false, false },
37433 { "htm", RS6000_BTM_HTM, false, false },
37434 { "hard-dfp", RS6000_BTM_DFP, false, false },
37435 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
37436 { "long-double-128", RS6000_BTM_LDBL128, false, false },
37437 { "float128", RS6000_BTM_FLOAT128, false, false },
37440 /* Option variables that we want to support inside attribute((target)) and
37441 #pragma GCC target operations. */
37443 struct rs6000_opt_var {
37444 const char *name; /* option name */
37445 size_t global_offset; /* offset of the option in global_options. */
37446 size_t target_offset; /* offset of the option in target options. */
37449 static struct rs6000_opt_var const rs6000_opt_vars[] =
37451 { "friz",
37452 offsetof (struct gcc_options, x_TARGET_FRIZ),
37453 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
37454 { "avoid-indexed-addresses",
37455 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
37456 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
37457 { "paired",
37458 offsetof (struct gcc_options, x_rs6000_paired_float),
37459 offsetof (struct cl_target_option, x_rs6000_paired_float), },
37460 { "longcall",
37461 offsetof (struct gcc_options, x_rs6000_default_long_calls),
37462 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
37463 { "optimize-swaps",
37464 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
37465 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
37466 { "allow-movmisalign",
37467 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
37468 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
37469 { "allow-df-permute",
37470 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
37471 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
37472 { "sched-groups",
37473 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
37474 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
37475 { "always-hint",
37476 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
37477 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
37478 { "align-branch-targets",
37479 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
37480 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
37481 { "vectorize-builtins",
37482 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
37483 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
37484 { "tls-markers",
37485 offsetof (struct gcc_options, x_tls_markers),
37486 offsetof (struct cl_target_option, x_tls_markers), },
37487 { "sched-prolog",
37488 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37489 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37490 { "sched-epilog",
37491 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37492 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37493 { "gen-cell-microcode",
37494 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
37495 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
37496 { "warn-cell-microcode",
37497 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
37498 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
37501 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
37502 parsing. Return true if there were no errors. */
37504 static bool
37505 rs6000_inner_target_options (tree args, bool attr_p)
37507 bool ret = true;
37509 if (args == NULL_TREE)
37512 else if (TREE_CODE (args) == STRING_CST)
37514 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37515 char *q;
37517 while ((q = strtok (p, ",")) != NULL)
37519 bool error_p = false;
37520 bool not_valid_p = false;
37521 const char *cpu_opt = NULL;
37523 p = NULL;
37524 if (strncmp (q, "cpu=", 4) == 0)
37526 int cpu_index = rs6000_cpu_name_lookup (q+4);
37527 if (cpu_index >= 0)
37528 rs6000_cpu_index = cpu_index;
37529 else
37531 error_p = true;
37532 cpu_opt = q+4;
37535 else if (strncmp (q, "tune=", 5) == 0)
37537 int tune_index = rs6000_cpu_name_lookup (q+5);
37538 if (tune_index >= 0)
37539 rs6000_tune_index = tune_index;
37540 else
37542 error_p = true;
37543 cpu_opt = q+5;
37546 else
37548 size_t i;
37549 bool invert = false;
37550 char *r = q;
37552 error_p = true;
37553 if (strncmp (r, "no-", 3) == 0)
37555 invert = true;
37556 r += 3;
37559 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
37560 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
37562 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
37564 if (!rs6000_opt_masks[i].valid_target)
37565 not_valid_p = true;
37566 else
37568 error_p = false;
37569 rs6000_isa_flags_explicit |= mask;
37571 /* VSX needs altivec, so -mvsx automagically sets
37572 altivec and disables -mavoid-indexed-addresses. */
37573 if (!invert)
37575 if (mask == OPTION_MASK_VSX)
37577 mask |= OPTION_MASK_ALTIVEC;
37578 TARGET_AVOID_XFORM = 0;
37582 if (rs6000_opt_masks[i].invert)
37583 invert = !invert;
37585 if (invert)
37586 rs6000_isa_flags &= ~mask;
37587 else
37588 rs6000_isa_flags |= mask;
37590 break;
37593 if (error_p && !not_valid_p)
37595 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
37596 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
37598 size_t j = rs6000_opt_vars[i].global_offset;
37599 *((int *) ((char *)&global_options + j)) = !invert;
37600 error_p = false;
37601 not_valid_p = false;
37602 break;
37607 if (error_p)
37609 const char *eprefix, *esuffix;
37611 ret = false;
37612 if (attr_p)
37614 eprefix = "__attribute__((__target__(";
37615 esuffix = ")))";
37617 else
37619 eprefix = "#pragma GCC target ";
37620 esuffix = "";
37623 if (cpu_opt)
37624 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
37625 q, esuffix);
37626 else if (not_valid_p)
37627 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
37628 else
37629 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
37634 else if (TREE_CODE (args) == TREE_LIST)
37638 tree value = TREE_VALUE (args);
37639 if (value)
37641 bool ret2 = rs6000_inner_target_options (value, attr_p);
37642 if (!ret2)
37643 ret = false;
37645 args = TREE_CHAIN (args);
37647 while (args != NULL_TREE);
37650 else
37651 gcc_unreachable ();
37653 return ret;
37656 /* Print out the target options as a list for -mdebug=target. */
37658 static void
37659 rs6000_debug_target_options (tree args, const char *prefix)
37661 if (args == NULL_TREE)
37662 fprintf (stderr, "%s<NULL>", prefix);
37664 else if (TREE_CODE (args) == STRING_CST)
37666 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37667 char *q;
37669 while ((q = strtok (p, ",")) != NULL)
37671 p = NULL;
37672 fprintf (stderr, "%s\"%s\"", prefix, q);
37673 prefix = ", ";
37677 else if (TREE_CODE (args) == TREE_LIST)
37681 tree value = TREE_VALUE (args);
37682 if (value)
37684 rs6000_debug_target_options (value, prefix);
37685 prefix = ", ";
37687 args = TREE_CHAIN (args);
37689 while (args != NULL_TREE);
37692 else
37693 gcc_unreachable ();
37695 return;
37699 /* Hook to validate attribute((target("..."))). */
37701 static bool
37702 rs6000_valid_attribute_p (tree fndecl,
37703 tree ARG_UNUSED (name),
37704 tree args,
37705 int flags)
37707 struct cl_target_option cur_target;
37708 bool ret;
37709 tree old_optimize = build_optimization_node (&global_options);
37710 tree new_target, new_optimize;
37711 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37713 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
37715 if (TARGET_DEBUG_TARGET)
37717 tree tname = DECL_NAME (fndecl);
37718 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
37719 if (tname)
37720 fprintf (stderr, "function: %.*s\n",
37721 (int) IDENTIFIER_LENGTH (tname),
37722 IDENTIFIER_POINTER (tname));
37723 else
37724 fprintf (stderr, "function: unknown\n");
37726 fprintf (stderr, "args:");
37727 rs6000_debug_target_options (args, " ");
37728 fprintf (stderr, "\n");
37730 if (flags)
37731 fprintf (stderr, "flags: 0x%x\n", flags);
37733 fprintf (stderr, "--------------------\n");
37736 old_optimize = build_optimization_node (&global_options);
37737 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37739 /* If the function changed the optimization levels as well as setting target
37740 options, start with the optimizations specified. */
37741 if (func_optimize && func_optimize != old_optimize)
37742 cl_optimization_restore (&global_options,
37743 TREE_OPTIMIZATION (func_optimize));
37745 /* The target attributes may also change some optimization flags, so update
37746 the optimization options if necessary. */
37747 cl_target_option_save (&cur_target, &global_options);
37748 rs6000_cpu_index = rs6000_tune_index = -1;
37749 ret = rs6000_inner_target_options (args, true);
37751 /* Set up any additional state. */
37752 if (ret)
37754 ret = rs6000_option_override_internal (false);
37755 new_target = build_target_option_node (&global_options);
37757 else
37758 new_target = NULL;
37760 new_optimize = build_optimization_node (&global_options);
37762 if (!new_target)
37763 ret = false;
37765 else if (fndecl)
37767 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
37769 if (old_optimize != new_optimize)
37770 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
37773 cl_target_option_restore (&global_options, &cur_target);
37775 if (old_optimize != new_optimize)
37776 cl_optimization_restore (&global_options,
37777 TREE_OPTIMIZATION (old_optimize));
37779 return ret;
37783 /* Hook to validate the current #pragma GCC target and set the state, and
37784 update the macros based on what was changed. If ARGS is NULL, then
37785 POP_TARGET is used to reset the options. */
37787 bool
37788 rs6000_pragma_target_parse (tree args, tree pop_target)
37790 tree prev_tree = build_target_option_node (&global_options);
37791 tree cur_tree;
37792 struct cl_target_option *prev_opt, *cur_opt;
37793 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
37794 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
37796 if (TARGET_DEBUG_TARGET)
37798 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
37799 fprintf (stderr, "args:");
37800 rs6000_debug_target_options (args, " ");
37801 fprintf (stderr, "\n");
37803 if (pop_target)
37805 fprintf (stderr, "pop_target:\n");
37806 debug_tree (pop_target);
37808 else
37809 fprintf (stderr, "pop_target: <NULL>\n");
37811 fprintf (stderr, "--------------------\n");
37814 if (! args)
37816 cur_tree = ((pop_target)
37817 ? pop_target
37818 : target_option_default_node);
37819 cl_target_option_restore (&global_options,
37820 TREE_TARGET_OPTION (cur_tree));
37822 else
37824 rs6000_cpu_index = rs6000_tune_index = -1;
37825 if (!rs6000_inner_target_options (args, false)
37826 || !rs6000_option_override_internal (false)
37827 || (cur_tree = build_target_option_node (&global_options))
37828 == NULL_TREE)
37830 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
37831 fprintf (stderr, "invalid pragma\n");
37833 return false;
37837 target_option_current_node = cur_tree;
37839 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
37840 change the macros that are defined. */
37841 if (rs6000_target_modify_macros_ptr)
37843 prev_opt = TREE_TARGET_OPTION (prev_tree);
37844 prev_bumask = prev_opt->x_rs6000_builtin_mask;
37845 prev_flags = prev_opt->x_rs6000_isa_flags;
37847 cur_opt = TREE_TARGET_OPTION (cur_tree);
37848 cur_flags = cur_opt->x_rs6000_isa_flags;
37849 cur_bumask = cur_opt->x_rs6000_builtin_mask;
37851 diff_bumask = (prev_bumask ^ cur_bumask);
37852 diff_flags = (prev_flags ^ cur_flags);
37854 if ((diff_flags != 0) || (diff_bumask != 0))
37856 /* Delete old macros. */
37857 rs6000_target_modify_macros_ptr (false,
37858 prev_flags & diff_flags,
37859 prev_bumask & diff_bumask);
37861 /* Define new macros. */
37862 rs6000_target_modify_macros_ptr (true,
37863 cur_flags & diff_flags,
37864 cur_bumask & diff_bumask);
37868 return true;
37872 /* Remember the last target of rs6000_set_current_function. */
37873 static GTY(()) tree rs6000_previous_fndecl;
37875 /* Establish appropriate back-end context for processing the function
37876 FNDECL. The argument might be NULL to indicate processing at top
37877 level, outside of any function scope. */
37878 static void
37879 rs6000_set_current_function (tree fndecl)
37881 tree old_tree = (rs6000_previous_fndecl
37882 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
37883 : NULL_TREE);
37885 tree new_tree = (fndecl
37886 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
37887 : NULL_TREE);
37889 if (TARGET_DEBUG_TARGET)
37891 bool print_final = false;
37892 fprintf (stderr, "\n==================== rs6000_set_current_function");
37894 if (fndecl)
37895 fprintf (stderr, ", fndecl %s (%p)",
37896 (DECL_NAME (fndecl)
37897 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
37898 : "<unknown>"), (void *)fndecl);
37900 if (rs6000_previous_fndecl)
37901 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
37903 fprintf (stderr, "\n");
37904 if (new_tree)
37906 fprintf (stderr, "\nnew fndecl target specific options:\n");
37907 debug_tree (new_tree);
37908 print_final = true;
37911 if (old_tree)
37913 fprintf (stderr, "\nold fndecl target specific options:\n");
37914 debug_tree (old_tree);
37915 print_final = true;
37918 if (print_final)
37919 fprintf (stderr, "--------------------\n");
37922 /* Only change the context if the function changes. This hook is called
37923 several times in the course of compiling a function, and we don't want to
37924 slow things down too much or call target_reinit when it isn't safe. */
37925 if (fndecl && fndecl != rs6000_previous_fndecl)
37927 rs6000_previous_fndecl = fndecl;
37928 if (old_tree == new_tree)
37931 else if (new_tree && new_tree != target_option_default_node)
37933 cl_target_option_restore (&global_options,
37934 TREE_TARGET_OPTION (new_tree));
37935 if (TREE_TARGET_GLOBALS (new_tree))
37936 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
37937 else
37938 TREE_TARGET_GLOBALS (new_tree)
37939 = save_target_globals_default_opts ();
37942 else if (old_tree && old_tree != target_option_default_node)
37944 new_tree = target_option_current_node;
37945 cl_target_option_restore (&global_options,
37946 TREE_TARGET_OPTION (new_tree));
37947 if (TREE_TARGET_GLOBALS (new_tree))
37948 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
37949 else if (new_tree == target_option_default_node)
37950 restore_target_globals (&default_target_globals);
37951 else
37952 TREE_TARGET_GLOBALS (new_tree)
37953 = save_target_globals_default_opts ();
37959 /* Save the current options */
37961 static void
37962 rs6000_function_specific_save (struct cl_target_option *ptr,
37963 struct gcc_options *opts)
37965 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
37966 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
37969 /* Restore the current options */
37971 static void
37972 rs6000_function_specific_restore (struct gcc_options *opts,
37973 struct cl_target_option *ptr)
37976 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
37977 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
37978 (void) rs6000_option_override_internal (false);
37981 /* Print the current options */
37983 static void
37984 rs6000_function_specific_print (FILE *file, int indent,
37985 struct cl_target_option *ptr)
37987 rs6000_print_isa_options (file, indent, "Isa options set",
37988 ptr->x_rs6000_isa_flags);
37990 rs6000_print_isa_options (file, indent, "Isa options explicit",
37991 ptr->x_rs6000_isa_flags_explicit);
37994 /* Helper function to print the current isa or misc options on a line. */
37996 static void
37997 rs6000_print_options_internal (FILE *file,
37998 int indent,
37999 const char *string,
38000 HOST_WIDE_INT flags,
38001 const char *prefix,
38002 const struct rs6000_opt_mask *opts,
38003 size_t num_elements)
38005 size_t i;
38006 size_t start_column = 0;
38007 size_t cur_column;
38008 size_t max_column = 120;
38009 size_t prefix_len = strlen (prefix);
38010 size_t comma_len = 0;
38011 const char *comma = "";
38013 if (indent)
38014 start_column += fprintf (file, "%*s", indent, "");
38016 if (!flags)
38018 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
38019 return;
38022 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
38024 /* Print the various mask options. */
38025 cur_column = start_column;
38026 for (i = 0; i < num_elements; i++)
38028 bool invert = opts[i].invert;
38029 const char *name = opts[i].name;
38030 const char *no_str = "";
38031 HOST_WIDE_INT mask = opts[i].mask;
38032 size_t len = comma_len + prefix_len + strlen (name);
38034 if (!invert)
38036 if ((flags & mask) == 0)
38038 no_str = "no-";
38039 len += sizeof ("no-") - 1;
38042 flags &= ~mask;
38045 else
38047 if ((flags & mask) != 0)
38049 no_str = "no-";
38050 len += sizeof ("no-") - 1;
38053 flags |= mask;
38056 cur_column += len;
38057 if (cur_column > max_column)
38059 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
38060 cur_column = start_column + len;
38061 comma = "";
38064 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
38065 comma = ", ";
38066 comma_len = sizeof (", ") - 1;
38069 fputs ("\n", file);
38072 /* Helper function to print the current isa options on a line. */
38074 static void
38075 rs6000_print_isa_options (FILE *file, int indent, const char *string,
38076 HOST_WIDE_INT flags)
38078 rs6000_print_options_internal (file, indent, string, flags, "-m",
38079 &rs6000_opt_masks[0],
38080 ARRAY_SIZE (rs6000_opt_masks));
38083 static void
38084 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
38085 HOST_WIDE_INT flags)
38087 rs6000_print_options_internal (file, indent, string, flags, "",
38088 &rs6000_builtin_mask_names[0],
38089 ARRAY_SIZE (rs6000_builtin_mask_names));
38093 /* Hook to determine if one function can safely inline another. */
38095 static bool
38096 rs6000_can_inline_p (tree caller, tree callee)
38098 bool ret = false;
38099 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
38100 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
38102 /* If callee has no option attributes, then it is ok to inline. */
38103 if (!callee_tree)
38104 ret = true;
38106 /* If caller has no option attributes, but callee does then it is not ok to
38107 inline. */
38108 else if (!caller_tree)
38109 ret = false;
38111 else
38113 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
38114 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
38116 /* Callee's options should a subset of the caller's, i.e. a vsx function
38117 can inline an altivec function but a non-vsx function can't inline a
38118 vsx function. */
38119 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
38120 == callee_opts->x_rs6000_isa_flags)
38121 ret = true;
38124 if (TARGET_DEBUG_TARGET)
38125 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
38126 (DECL_NAME (caller)
38127 ? IDENTIFIER_POINTER (DECL_NAME (caller))
38128 : "<unknown>"),
38129 (DECL_NAME (callee)
38130 ? IDENTIFIER_POINTER (DECL_NAME (callee))
38131 : "<unknown>"),
38132 (ret ? "can" : "cannot"));
38134 return ret;
38137 /* Allocate a stack temp and fixup the address so it meets the particular
38138 memory requirements (either offetable or REG+REG addressing). */
38141 rs6000_allocate_stack_temp (machine_mode mode,
38142 bool offsettable_p,
38143 bool reg_reg_p)
38145 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
38146 rtx addr = XEXP (stack, 0);
38147 int strict_p = (reload_in_progress || reload_completed);
38149 if (!legitimate_indirect_address_p (addr, strict_p))
38151 if (offsettable_p
38152 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
38153 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38155 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
38156 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38159 return stack;
38162 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
38163 to such a form to deal with memory reference instructions like STFIWX that
38164 only take reg+reg addressing. */
38167 rs6000_address_for_fpconvert (rtx x)
38169 int strict_p = (reload_in_progress || reload_completed);
38170 rtx addr;
38172 gcc_assert (MEM_P (x));
38173 addr = XEXP (x, 0);
38174 if (! legitimate_indirect_address_p (addr, strict_p)
38175 && ! legitimate_indexed_address_p (addr, strict_p))
38177 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
38179 rtx reg = XEXP (addr, 0);
38180 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
38181 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
38182 gcc_assert (REG_P (reg));
38183 emit_insn (gen_add3_insn (reg, reg, size_rtx));
38184 addr = reg;
38186 else if (GET_CODE (addr) == PRE_MODIFY)
38188 rtx reg = XEXP (addr, 0);
38189 rtx expr = XEXP (addr, 1);
38190 gcc_assert (REG_P (reg));
38191 gcc_assert (GET_CODE (expr) == PLUS);
38192 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
38193 addr = reg;
38196 x = replace_equiv_address (x, copy_addr_to_reg (addr));
38199 return x;
38202 /* Given a memory reference, if it is not in the form for altivec memory
38203 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
38204 convert to the altivec format. */
38207 rs6000_address_for_altivec (rtx x)
38209 gcc_assert (MEM_P (x));
38210 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
38212 rtx addr = XEXP (x, 0);
38213 int strict_p = (reload_in_progress || reload_completed);
38215 if (!legitimate_indexed_address_p (addr, strict_p)
38216 && !legitimate_indirect_address_p (addr, strict_p))
38217 addr = copy_to_mode_reg (Pmode, addr);
38219 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
38220 x = change_address (x, GET_MODE (x), addr);
38223 return x;
38226 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
38228 On the RS/6000, all integer constants are acceptable, most won't be valid
38229 for particular insns, though. Only easy FP constants are acceptable. */
38231 static bool
38232 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
38234 if (TARGET_ELF && tls_referenced_p (x))
38235 return false;
38237 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
38238 || GET_MODE (x) == VOIDmode
38239 || (TARGET_POWERPC64 && mode == DImode)
38240 || easy_fp_constant (x, mode)
38241 || easy_vector_constant (x, mode));
38245 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
38247 static bool
38248 chain_already_loaded (rtx_insn *last)
38250 for (; last != NULL; last = PREV_INSN (last))
38252 if (NONJUMP_INSN_P (last))
38254 rtx patt = PATTERN (last);
38256 if (GET_CODE (patt) == SET)
38258 rtx lhs = XEXP (patt, 0);
38260 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
38261 return true;
38265 return false;
38268 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
38270 void
38271 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38273 const bool direct_call_p
38274 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
38275 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
38276 rtx toc_load = NULL_RTX;
38277 rtx toc_restore = NULL_RTX;
38278 rtx func_addr;
38279 rtx abi_reg = NULL_RTX;
38280 rtx call[4];
38281 int n_call;
38282 rtx insn;
38284 /* Handle longcall attributes. */
38285 if (INTVAL (cookie) & CALL_LONG)
38286 func_desc = rs6000_longcall_ref (func_desc);
38288 /* Handle indirect calls. */
38289 if (GET_CODE (func_desc) != SYMBOL_REF
38290 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
38292 /* Save the TOC into its reserved slot before the call,
38293 and prepare to restore it after the call. */
38294 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
38295 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
38296 rtx stack_toc_mem = gen_frame_mem (Pmode,
38297 gen_rtx_PLUS (Pmode, stack_ptr,
38298 stack_toc_offset));
38299 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
38300 gen_rtvec (1, stack_toc_offset),
38301 UNSPEC_TOCSLOT);
38302 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
38304 /* Can we optimize saving the TOC in the prologue or
38305 do we need to do it at every call? */
38306 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
38307 cfun->machine->save_toc_in_prologue = true;
38308 else
38310 MEM_VOLATILE_P (stack_toc_mem) = 1;
38311 emit_move_insn (stack_toc_mem, toc_reg);
38314 if (DEFAULT_ABI == ABI_ELFv2)
38316 /* A function pointer in the ELFv2 ABI is just a plain address, but
38317 the ABI requires it to be loaded into r12 before the call. */
38318 func_addr = gen_rtx_REG (Pmode, 12);
38319 emit_move_insn (func_addr, func_desc);
38320 abi_reg = func_addr;
38322 else
38324 /* A function pointer under AIX is a pointer to a data area whose
38325 first word contains the actual address of the function, whose
38326 second word contains a pointer to its TOC, and whose third word
38327 contains a value to place in the static chain register (r11).
38328 Note that if we load the static chain, our "trampoline" need
38329 not have any executable code. */
38331 /* Load up address of the actual function. */
38332 func_desc = force_reg (Pmode, func_desc);
38333 func_addr = gen_reg_rtx (Pmode);
38334 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
38336 /* Prepare to load the TOC of the called function. Note that the
38337 TOC load must happen immediately before the actual call so
38338 that unwinding the TOC registers works correctly. See the
38339 comment in frob_update_context. */
38340 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
38341 rtx func_toc_mem = gen_rtx_MEM (Pmode,
38342 gen_rtx_PLUS (Pmode, func_desc,
38343 func_toc_offset));
38344 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
38346 /* If we have a static chain, load it up. But, if the call was
38347 originally direct, the 3rd word has not been written since no
38348 trampoline has been built, so we ought not to load it, lest we
38349 override a static chain value. */
38350 if (!direct_call_p
38351 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
38352 && !chain_already_loaded (get_current_sequence ()->next->last))
38354 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
38355 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
38356 rtx func_sc_mem = gen_rtx_MEM (Pmode,
38357 gen_rtx_PLUS (Pmode, func_desc,
38358 func_sc_offset));
38359 emit_move_insn (sc_reg, func_sc_mem);
38360 abi_reg = sc_reg;
38364 else
38366 /* Direct calls use the TOC: for local calls, the callee will
38367 assume the TOC register is set; for non-local calls, the
38368 PLT stub needs the TOC register. */
38369 abi_reg = toc_reg;
38370 func_addr = func_desc;
38373 /* Create the call. */
38374 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
38375 if (value != NULL_RTX)
38376 call[0] = gen_rtx_SET (value, call[0]);
38377 n_call = 1;
38379 if (toc_load)
38380 call[n_call++] = toc_load;
38381 if (toc_restore)
38382 call[n_call++] = toc_restore;
38384 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
38386 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
38387 insn = emit_call_insn (insn);
38389 /* Mention all registers defined by the ABI to hold information
38390 as uses in CALL_INSN_FUNCTION_USAGE. */
38391 if (abi_reg)
38392 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38395 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
38397 void
38398 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38400 rtx call[2];
38401 rtx insn;
38403 gcc_assert (INTVAL (cookie) == 0);
38405 /* Create the call. */
38406 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
38407 if (value != NULL_RTX)
38408 call[0] = gen_rtx_SET (value, call[0]);
38410 call[1] = simple_return_rtx;
38412 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
38413 insn = emit_call_insn (insn);
38415 /* Note use of the TOC register. */
38416 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
38419 /* Return whether we need to always update the saved TOC pointer when we update
38420 the stack pointer. */
38422 static bool
38423 rs6000_save_toc_in_prologue_p (void)
38425 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38428 #ifdef HAVE_GAS_HIDDEN
38429 # define USE_HIDDEN_LINKONCE 1
38430 #else
38431 # define USE_HIDDEN_LINKONCE 0
38432 #endif
38434 /* Fills in the label name that should be used for a 476 link stack thunk. */
38436 void
38437 get_ppc476_thunk_name (char name[32])
38439 gcc_assert (TARGET_LINK_STACK);
38441 if (USE_HIDDEN_LINKONCE)
38442 sprintf (name, "__ppc476.get_thunk");
38443 else
38444 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38447 /* This function emits the simple thunk routine that is used to preserve
38448 the link stack on the 476 cpu. */
38450 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38451 static void
38452 rs6000_code_end (void)
38454 char name[32];
38455 tree decl;
38457 if (!TARGET_LINK_STACK)
38458 return;
38460 get_ppc476_thunk_name (name);
38462 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38463 build_function_type_list (void_type_node, NULL_TREE));
38464 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38465 NULL_TREE, void_type_node);
38466 TREE_PUBLIC (decl) = 1;
38467 TREE_STATIC (decl) = 1;
38469 #if RS6000_WEAK
38470 if (USE_HIDDEN_LINKONCE)
38472 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38473 targetm.asm_out.unique_section (decl, 0);
38474 switch_to_section (get_named_section (decl, NULL, 0));
38475 DECL_WEAK (decl) = 1;
38476 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38477 targetm.asm_out.globalize_label (asm_out_file, name);
38478 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38479 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38481 else
38482 #endif
38484 switch_to_section (text_section);
38485 ASM_OUTPUT_LABEL (asm_out_file, name);
38488 DECL_INITIAL (decl) = make_node (BLOCK);
38489 current_function_decl = decl;
38490 allocate_struct_function (decl, false);
38491 init_function_start (decl);
38492 first_function_block_is_cold = false;
38493 /* Make sure unwind info is emitted for the thunk if needed. */
38494 final_start_function (emit_barrier (), asm_out_file, 1);
38496 fputs ("\tblr\n", asm_out_file);
38498 final_end_function ();
38499 init_insn_lengths ();
38500 free_after_compilation (cfun);
38501 set_cfun (NULL);
38502 current_function_decl = NULL;
38505 /* Add r30 to hard reg set if the prologue sets it up and it is not
38506 pic_offset_table_rtx. */
38508 static void
38509 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38511 if (!TARGET_SINGLE_PIC_BASE
38512 && TARGET_TOC
38513 && TARGET_MINIMAL_TOC
38514 && get_pool_size () != 0)
38515 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38516 if (cfun->machine->split_stack_argp_used)
38517 add_to_hard_reg_set (&set->set, Pmode, 12);
38521 /* Helper function for rs6000_split_logical to emit a logical instruction after
38522 spliting the operation to single GPR registers.
38524 DEST is the destination register.
38525 OP1 and OP2 are the input source registers.
38526 CODE is the base operation (AND, IOR, XOR, NOT).
38527 MODE is the machine mode.
38528 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38529 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38530 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38532 static void
38533 rs6000_split_logical_inner (rtx dest,
38534 rtx op1,
38535 rtx op2,
38536 enum rtx_code code,
38537 machine_mode mode,
38538 bool complement_final_p,
38539 bool complement_op1_p,
38540 bool complement_op2_p)
38542 rtx bool_rtx;
38544 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38545 if (op2 && GET_CODE (op2) == CONST_INT
38546 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38547 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38549 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38550 HOST_WIDE_INT value = INTVAL (op2) & mask;
38552 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38553 if (code == AND)
38555 if (value == 0)
38557 emit_insn (gen_rtx_SET (dest, const0_rtx));
38558 return;
38561 else if (value == mask)
38563 if (!rtx_equal_p (dest, op1))
38564 emit_insn (gen_rtx_SET (dest, op1));
38565 return;
38569 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38570 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38571 else if (code == IOR || code == XOR)
38573 if (value == 0)
38575 if (!rtx_equal_p (dest, op1))
38576 emit_insn (gen_rtx_SET (dest, op1));
38577 return;
38582 if (code == AND && mode == SImode
38583 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38585 emit_insn (gen_andsi3 (dest, op1, op2));
38586 return;
38589 if (complement_op1_p)
38590 op1 = gen_rtx_NOT (mode, op1);
38592 if (complement_op2_p)
38593 op2 = gen_rtx_NOT (mode, op2);
38595 /* For canonical RTL, if only one arm is inverted it is the first. */
38596 if (!complement_op1_p && complement_op2_p)
38597 std::swap (op1, op2);
38599 bool_rtx = ((code == NOT)
38600 ? gen_rtx_NOT (mode, op1)
38601 : gen_rtx_fmt_ee (code, mode, op1, op2));
38603 if (complement_final_p)
38604 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38606 emit_insn (gen_rtx_SET (dest, bool_rtx));
38609 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38610 operations are split immediately during RTL generation to allow for more
38611 optimizations of the AND/IOR/XOR.
38613 OPERANDS is an array containing the destination and two input operands.
38614 CODE is the base operation (AND, IOR, XOR, NOT).
38615 MODE is the machine mode.
38616 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38617 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38618 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38619 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38620 formation of the AND instructions. */
38622 static void
38623 rs6000_split_logical_di (rtx operands[3],
38624 enum rtx_code code,
38625 bool complement_final_p,
38626 bool complement_op1_p,
38627 bool complement_op2_p)
38629 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38630 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38631 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38632 enum hi_lo { hi = 0, lo = 1 };
38633 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38634 size_t i;
38636 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38637 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38638 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38639 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38641 if (code == NOT)
38642 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38643 else
38645 if (GET_CODE (operands[2]) != CONST_INT)
38647 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38648 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38650 else
38652 HOST_WIDE_INT value = INTVAL (operands[2]);
38653 HOST_WIDE_INT value_hi_lo[2];
38655 gcc_assert (!complement_final_p);
38656 gcc_assert (!complement_op1_p);
38657 gcc_assert (!complement_op2_p);
38659 value_hi_lo[hi] = value >> 32;
38660 value_hi_lo[lo] = value & lower_32bits;
38662 for (i = 0; i < 2; i++)
38664 HOST_WIDE_INT sub_value = value_hi_lo[i];
38666 if (sub_value & sign_bit)
38667 sub_value |= upper_32bits;
38669 op2_hi_lo[i] = GEN_INT (sub_value);
38671 /* If this is an AND instruction, check to see if we need to load
38672 the value in a register. */
38673 if (code == AND && sub_value != -1 && sub_value != 0
38674 && !and_operand (op2_hi_lo[i], SImode))
38675 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38680 for (i = 0; i < 2; i++)
38682 /* Split large IOR/XOR operations. */
38683 if ((code == IOR || code == XOR)
38684 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38685 && !complement_final_p
38686 && !complement_op1_p
38687 && !complement_op2_p
38688 && !logical_const_operand (op2_hi_lo[i], SImode))
38690 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38691 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38692 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38693 rtx tmp = gen_reg_rtx (SImode);
38695 /* Make sure the constant is sign extended. */
38696 if ((hi_16bits & sign_bit) != 0)
38697 hi_16bits |= upper_32bits;
38699 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38700 code, SImode, false, false, false);
38702 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38703 code, SImode, false, false, false);
38705 else
38706 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38707 code, SImode, complement_final_p,
38708 complement_op1_p, complement_op2_p);
38711 return;
38714 /* Split the insns that make up boolean operations operating on multiple GPR
38715 registers. The boolean MD patterns ensure that the inputs either are
38716 exactly the same as the output registers, or there is no overlap.
38718 OPERANDS is an array containing the destination and two input operands.
38719 CODE is the base operation (AND, IOR, XOR, NOT).
38720 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38721 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38722 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38724 void
38725 rs6000_split_logical (rtx operands[3],
38726 enum rtx_code code,
38727 bool complement_final_p,
38728 bool complement_op1_p,
38729 bool complement_op2_p)
38731 machine_mode mode = GET_MODE (operands[0]);
38732 machine_mode sub_mode;
38733 rtx op0, op1, op2;
38734 int sub_size, regno0, regno1, nregs, i;
38736 /* If this is DImode, use the specialized version that can run before
38737 register allocation. */
38738 if (mode == DImode && !TARGET_POWERPC64)
38740 rs6000_split_logical_di (operands, code, complement_final_p,
38741 complement_op1_p, complement_op2_p);
38742 return;
38745 op0 = operands[0];
38746 op1 = operands[1];
38747 op2 = (code == NOT) ? NULL_RTX : operands[2];
38748 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38749 sub_size = GET_MODE_SIZE (sub_mode);
38750 regno0 = REGNO (op0);
38751 regno1 = REGNO (op1);
38753 gcc_assert (reload_completed);
38754 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38755 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38757 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38758 gcc_assert (nregs > 1);
38760 if (op2 && REG_P (op2))
38761 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38763 for (i = 0; i < nregs; i++)
38765 int offset = i * sub_size;
38766 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38767 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38768 rtx sub_op2 = ((code == NOT)
38769 ? NULL_RTX
38770 : simplify_subreg (sub_mode, op2, mode, offset));
38772 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38773 complement_final_p, complement_op1_p,
38774 complement_op2_p);
38777 return;
38781 /* Return true if the peephole2 can combine a load involving a combination of
38782 an addis instruction and a load with an offset that can be fused together on
38783 a power8. */
38785 bool
38786 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38787 rtx addis_value, /* addis value. */
38788 rtx target, /* target register that is loaded. */
38789 rtx mem) /* bottom part of the memory addr. */
38791 rtx addr;
38792 rtx base_reg;
38794 /* Validate arguments. */
38795 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38796 return false;
38798 if (!base_reg_operand (target, GET_MODE (target)))
38799 return false;
38801 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38802 return false;
38804 /* Allow sign/zero extension. */
38805 if (GET_CODE (mem) == ZERO_EXTEND
38806 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38807 mem = XEXP (mem, 0);
38809 if (!MEM_P (mem))
38810 return false;
38812 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38813 return false;
38815 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38816 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38817 return false;
38819 /* Validate that the register used to load the high value is either the
38820 register being loaded, or we can safely replace its use.
38822 This function is only called from the peephole2 pass and we assume that
38823 there are 2 instructions in the peephole (addis and load), so we want to
38824 check if the target register was not used in the memory address and the
38825 register to hold the addis result is dead after the peephole. */
38826 if (REGNO (addis_reg) != REGNO (target))
38828 if (reg_mentioned_p (target, mem))
38829 return false;
38831 if (!peep2_reg_dead_p (2, addis_reg))
38832 return false;
38834 /* If the target register being loaded is the stack pointer, we must
38835 avoid loading any other value into it, even temporarily. */
38836 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38837 return false;
38840 base_reg = XEXP (addr, 0);
38841 return REGNO (addis_reg) == REGNO (base_reg);
38844 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38845 sequence. We adjust the addis register to use the target register. If the
38846 load sign extends, we adjust the code to do the zero extending load, and an
38847 explicit sign extension later since the fusion only covers zero extending
38848 loads.
38850 The operands are:
38851 operands[0] register set with addis (to be replaced with target)
38852 operands[1] value set via addis
38853 operands[2] target register being loaded
38854 operands[3] D-form memory reference using operands[0]. */
38856 void
38857 expand_fusion_gpr_load (rtx *operands)
38859 rtx addis_value = operands[1];
38860 rtx target = operands[2];
38861 rtx orig_mem = operands[3];
38862 rtx new_addr, new_mem, orig_addr, offset;
38863 enum rtx_code plus_or_lo_sum;
38864 machine_mode target_mode = GET_MODE (target);
38865 machine_mode extend_mode = target_mode;
38866 machine_mode ptr_mode = Pmode;
38867 enum rtx_code extend = UNKNOWN;
38869 if (GET_CODE (orig_mem) == ZERO_EXTEND
38870 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38872 extend = GET_CODE (orig_mem);
38873 orig_mem = XEXP (orig_mem, 0);
38874 target_mode = GET_MODE (orig_mem);
38877 gcc_assert (MEM_P (orig_mem));
38879 orig_addr = XEXP (orig_mem, 0);
38880 plus_or_lo_sum = GET_CODE (orig_addr);
38881 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38883 offset = XEXP (orig_addr, 1);
38884 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38885 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38887 if (extend != UNKNOWN)
38888 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38890 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38891 UNSPEC_FUSION_GPR);
38892 emit_insn (gen_rtx_SET (target, new_mem));
38894 if (extend == SIGN_EXTEND)
38896 int sub_off = ((BYTES_BIG_ENDIAN)
38897 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38898 : 0);
38899 rtx sign_reg
38900 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38902 emit_insn (gen_rtx_SET (target,
38903 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38906 return;
38909 /* Emit the addis instruction that will be part of a fused instruction
38910 sequence. */
38912 void
38913 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
38914 const char *mode_name)
38916 rtx fuse_ops[10];
38917 char insn_template[80];
38918 const char *addis_str = NULL;
38919 const char *comment_str = ASM_COMMENT_START;
38921 if (*comment_str == ' ')
38922 comment_str++;
38924 /* Emit the addis instruction. */
38925 fuse_ops[0] = target;
38926 if (satisfies_constraint_L (addis_value))
38928 fuse_ops[1] = addis_value;
38929 addis_str = "lis %0,%v1";
38932 else if (GET_CODE (addis_value) == PLUS)
38934 rtx op0 = XEXP (addis_value, 0);
38935 rtx op1 = XEXP (addis_value, 1);
38937 if (REG_P (op0) && CONST_INT_P (op1)
38938 && satisfies_constraint_L (op1))
38940 fuse_ops[1] = op0;
38941 fuse_ops[2] = op1;
38942 addis_str = "addis %0,%1,%v2";
38946 else if (GET_CODE (addis_value) == HIGH)
38948 rtx value = XEXP (addis_value, 0);
38949 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38951 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38952 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38953 if (TARGET_ELF)
38954 addis_str = "addis %0,%2,%1@toc@ha";
38956 else if (TARGET_XCOFF)
38957 addis_str = "addis %0,%1@u(%2)";
38959 else
38960 gcc_unreachable ();
38963 else if (GET_CODE (value) == PLUS)
38965 rtx op0 = XEXP (value, 0);
38966 rtx op1 = XEXP (value, 1);
38968 if (GET_CODE (op0) == UNSPEC
38969 && XINT (op0, 1) == UNSPEC_TOCREL
38970 && CONST_INT_P (op1))
38972 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38973 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38974 fuse_ops[3] = op1;
38975 if (TARGET_ELF)
38976 addis_str = "addis %0,%2,%1+%3@toc@ha";
38978 else if (TARGET_XCOFF)
38979 addis_str = "addis %0,%1+%3@u(%2)";
38981 else
38982 gcc_unreachable ();
38986 else if (satisfies_constraint_L (value))
38988 fuse_ops[1] = value;
38989 addis_str = "lis %0,%v1";
38992 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38994 fuse_ops[1] = value;
38995 addis_str = "lis %0,%1@ha";
38999 if (!addis_str)
39000 fatal_insn ("Could not generate addis value for fusion", addis_value);
39002 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
39003 comment, mode_name);
39004 output_asm_insn (insn_template, fuse_ops);
39007 /* Emit a D-form load or store instruction that is the second instruction
39008 of a fusion sequence. */
39010 void
39011 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
39012 const char *insn_str)
39014 rtx fuse_ops[10];
39015 char insn_template[80];
39017 fuse_ops[0] = load_store_reg;
39018 fuse_ops[1] = addis_reg;
39020 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
39022 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
39023 fuse_ops[2] = offset;
39024 output_asm_insn (insn_template, fuse_ops);
39027 else if (GET_CODE (offset) == UNSPEC
39028 && XINT (offset, 1) == UNSPEC_TOCREL)
39030 if (TARGET_ELF)
39031 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
39033 else if (TARGET_XCOFF)
39034 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39036 else
39037 gcc_unreachable ();
39039 fuse_ops[2] = XVECEXP (offset, 0, 0);
39040 output_asm_insn (insn_template, fuse_ops);
39043 else if (GET_CODE (offset) == PLUS
39044 && GET_CODE (XEXP (offset, 0)) == UNSPEC
39045 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
39046 && CONST_INT_P (XEXP (offset, 1)))
39048 rtx tocrel_unspec = XEXP (offset, 0);
39049 if (TARGET_ELF)
39050 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
39052 else if (TARGET_XCOFF)
39053 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
39055 else
39056 gcc_unreachable ();
39058 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
39059 fuse_ops[3] = XEXP (offset, 1);
39060 output_asm_insn (insn_template, fuse_ops);
39063 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
39065 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39067 fuse_ops[2] = offset;
39068 output_asm_insn (insn_template, fuse_ops);
39071 else
39072 fatal_insn ("Unable to generate load/store offset for fusion", offset);
39074 return;
39077 /* Wrap a TOC address that can be fused to indicate that special fusion
39078 processing is needed. */
39081 fusion_wrap_memory_address (rtx old_mem)
39083 rtx old_addr = XEXP (old_mem, 0);
39084 rtvec v = gen_rtvec (1, old_addr);
39085 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
39086 return replace_equiv_address_nv (old_mem, new_addr, false);
39089 /* Given an address, convert it into the addis and load offset parts. Addresses
39090 created during the peephole2 process look like:
39091 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
39092 (unspec [(...)] UNSPEC_TOCREL))
39094 Addresses created via toc fusion look like:
39095 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
39097 static void
39098 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
39100 rtx hi, lo;
39102 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
39104 lo = XVECEXP (addr, 0, 0);
39105 hi = gen_rtx_HIGH (Pmode, lo);
39107 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
39109 hi = XEXP (addr, 0);
39110 lo = XEXP (addr, 1);
39112 else
39113 gcc_unreachable ();
39115 *p_hi = hi;
39116 *p_lo = lo;
39119 /* Return a string to fuse an addis instruction with a gpr load to the same
39120 register that we loaded up the addis instruction. The address that is used
39121 is the logical address that was formed during peephole2:
39122 (lo_sum (high) (low-part))
39124 Or the address is the TOC address that is wrapped before register allocation:
39125 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
39127 The code is complicated, so we call output_asm_insn directly, and just
39128 return "". */
39130 const char *
39131 emit_fusion_gpr_load (rtx target, rtx mem)
39133 rtx addis_value;
39134 rtx addr;
39135 rtx load_offset;
39136 const char *load_str = NULL;
39137 const char *mode_name = NULL;
39138 machine_mode mode;
39140 if (GET_CODE (mem) == ZERO_EXTEND)
39141 mem = XEXP (mem, 0);
39143 gcc_assert (REG_P (target) && MEM_P (mem));
39145 addr = XEXP (mem, 0);
39146 fusion_split_address (addr, &addis_value, &load_offset);
39148 /* Now emit the load instruction to the same register. */
39149 mode = GET_MODE (mem);
39150 switch (mode)
39152 case QImode:
39153 mode_name = "char";
39154 load_str = "lbz";
39155 break;
39157 case HImode:
39158 mode_name = "short";
39159 load_str = "lhz";
39160 break;
39162 case SImode:
39163 case SFmode:
39164 mode_name = (mode == SFmode) ? "float" : "int";
39165 load_str = "lwz";
39166 break;
39168 case DImode:
39169 case DFmode:
39170 gcc_assert (TARGET_POWERPC64);
39171 mode_name = (mode == DFmode) ? "double" : "long";
39172 load_str = "ld";
39173 break;
39175 default:
39176 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
39179 /* Emit the addis instruction. */
39180 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
39182 /* Emit the D-form load instruction. */
39183 emit_fusion_load_store (target, target, load_offset, load_str);
39185 return "";
39189 /* Return true if the peephole2 can combine a load/store involving a
39190 combination of an addis instruction and the memory operation. This was
39191 added to the ISA 3.0 (power9) hardware. */
39193 bool
39194 fusion_p9_p (rtx addis_reg, /* register set via addis. */
39195 rtx addis_value, /* addis value. */
39196 rtx dest, /* destination (memory or register). */
39197 rtx src) /* source (register or memory). */
39199 rtx addr, mem, offset;
39200 enum machine_mode mode = GET_MODE (src);
39202 /* Validate arguments. */
39203 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
39204 return false;
39206 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
39207 return false;
39209 /* Ignore extend operations that are part of the load. */
39210 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
39211 src = XEXP (src, 0);
39213 /* Test for memory<-register or register<-memory. */
39214 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
39216 if (!MEM_P (dest))
39217 return false;
39219 mem = dest;
39222 else if (MEM_P (src))
39224 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
39225 return false;
39227 mem = src;
39230 else
39231 return false;
39233 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
39234 if (GET_CODE (addr) == PLUS)
39236 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39237 return false;
39239 return satisfies_constraint_I (XEXP (addr, 1));
39242 else if (GET_CODE (addr) == LO_SUM)
39244 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39245 return false;
39247 offset = XEXP (addr, 1);
39248 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
39249 return small_toc_ref (offset, GET_MODE (offset));
39251 else if (TARGET_ELF && !TARGET_POWERPC64)
39252 return CONSTANT_P (offset);
39255 return false;
39258 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39259 load sequence.
39261 The operands are:
39262 operands[0] register set with addis
39263 operands[1] value set via addis
39264 operands[2] target register being loaded
39265 operands[3] D-form memory reference using operands[0].
39267 This is similar to the fusion introduced with power8, except it scales to
39268 both loads/stores and does not require the result register to be the same as
39269 the base register. At the moment, we only do this if register set with addis
39270 is dead. */
39272 void
39273 expand_fusion_p9_load (rtx *operands)
39275 rtx tmp_reg = operands[0];
39276 rtx addis_value = operands[1];
39277 rtx target = operands[2];
39278 rtx orig_mem = operands[3];
39279 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
39280 enum rtx_code plus_or_lo_sum;
39281 machine_mode target_mode = GET_MODE (target);
39282 machine_mode extend_mode = target_mode;
39283 machine_mode ptr_mode = Pmode;
39284 enum rtx_code extend = UNKNOWN;
39286 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
39288 extend = GET_CODE (orig_mem);
39289 orig_mem = XEXP (orig_mem, 0);
39290 target_mode = GET_MODE (orig_mem);
39293 gcc_assert (MEM_P (orig_mem));
39295 orig_addr = XEXP (orig_mem, 0);
39296 plus_or_lo_sum = GET_CODE (orig_addr);
39297 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39299 offset = XEXP (orig_addr, 1);
39300 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39301 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39303 if (extend != UNKNOWN)
39304 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
39306 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39307 UNSPEC_FUSION_P9);
39309 set = gen_rtx_SET (target, new_mem);
39310 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39311 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39312 emit_insn (insn);
39314 return;
39317 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39318 store sequence.
39320 The operands are:
39321 operands[0] register set with addis
39322 operands[1] value set via addis
39323 operands[2] target D-form memory being stored to
39324 operands[3] register being stored
39326 This is similar to the fusion introduced with power8, except it scales to
39327 both loads/stores and does not require the result register to be the same as
39328 the base register. At the moment, we only do this if register set with addis
39329 is dead. */
39331 void
39332 expand_fusion_p9_store (rtx *operands)
39334 rtx tmp_reg = operands[0];
39335 rtx addis_value = operands[1];
39336 rtx orig_mem = operands[2];
39337 rtx src = operands[3];
39338 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
39339 enum rtx_code plus_or_lo_sum;
39340 machine_mode target_mode = GET_MODE (orig_mem);
39341 machine_mode ptr_mode = Pmode;
39343 gcc_assert (MEM_P (orig_mem));
39345 orig_addr = XEXP (orig_mem, 0);
39346 plus_or_lo_sum = GET_CODE (orig_addr);
39347 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39349 offset = XEXP (orig_addr, 1);
39350 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39351 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39353 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
39354 UNSPEC_FUSION_P9);
39356 set = gen_rtx_SET (new_mem, new_src);
39357 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39358 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39359 emit_insn (insn);
39361 return;
39364 /* Return a string to fuse an addis instruction with a load using extended
39365 fusion. The address that is used is the logical address that was formed
39366 during peephole2: (lo_sum (high) (low-part))
39368 The code is complicated, so we call output_asm_insn directly, and just
39369 return "". */
39371 const char *
39372 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
39374 enum machine_mode mode = GET_MODE (reg);
39375 rtx hi;
39376 rtx lo;
39377 rtx addr;
39378 const char *load_string;
39379 int r;
39381 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
39383 mem = XEXP (mem, 0);
39384 mode = GET_MODE (mem);
39387 if (GET_CODE (reg) == SUBREG)
39389 gcc_assert (SUBREG_BYTE (reg) == 0);
39390 reg = SUBREG_REG (reg);
39393 if (!REG_P (reg))
39394 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
39396 r = REGNO (reg);
39397 if (FP_REGNO_P (r))
39399 if (mode == SFmode)
39400 load_string = "lfs";
39401 else if (mode == DFmode || mode == DImode)
39402 load_string = "lfd";
39403 else
39404 gcc_unreachable ();
39406 else if (INT_REGNO_P (r))
39408 switch (mode)
39410 case QImode:
39411 load_string = "lbz";
39412 break;
39413 case HImode:
39414 load_string = "lhz";
39415 break;
39416 case SImode:
39417 case SFmode:
39418 load_string = "lwz";
39419 break;
39420 case DImode:
39421 case DFmode:
39422 if (!TARGET_POWERPC64)
39423 gcc_unreachable ();
39424 load_string = "ld";
39425 break;
39426 default:
39427 gcc_unreachable ();
39430 else
39431 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
39433 if (!MEM_P (mem))
39434 fatal_insn ("emit_fusion_p9_load not MEM", mem);
39436 addr = XEXP (mem, 0);
39437 fusion_split_address (addr, &hi, &lo);
39439 /* Emit the addis instruction. */
39440 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
39442 /* Emit the D-form load instruction. */
39443 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
39445 return "";
39448 /* Return a string to fuse an addis instruction with a store using extended
39449 fusion. The address that is used is the logical address that was formed
39450 during peephole2: (lo_sum (high) (low-part))
39452 The code is complicated, so we call output_asm_insn directly, and just
39453 return "". */
39455 const char *
39456 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
39458 enum machine_mode mode = GET_MODE (reg);
39459 rtx hi;
39460 rtx lo;
39461 rtx addr;
39462 const char *store_string;
39463 int r;
39465 if (GET_CODE (reg) == SUBREG)
39467 gcc_assert (SUBREG_BYTE (reg) == 0);
39468 reg = SUBREG_REG (reg);
39471 if (!REG_P (reg))
39472 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
39474 r = REGNO (reg);
39475 if (FP_REGNO_P (r))
39477 if (mode == SFmode)
39478 store_string = "stfs";
39479 else if (mode == DFmode)
39480 store_string = "stfd";
39481 else
39482 gcc_unreachable ();
39484 else if (INT_REGNO_P (r))
39486 switch (mode)
39488 case QImode:
39489 store_string = "stb";
39490 break;
39491 case HImode:
39492 store_string = "sth";
39493 break;
39494 case SImode:
39495 case SFmode:
39496 store_string = "stw";
39497 break;
39498 case DImode:
39499 case DFmode:
39500 if (!TARGET_POWERPC64)
39501 gcc_unreachable ();
39502 store_string = "std";
39503 break;
39504 default:
39505 gcc_unreachable ();
39508 else
39509 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39511 if (!MEM_P (mem))
39512 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39514 addr = XEXP (mem, 0);
39515 fusion_split_address (addr, &hi, &lo);
39517 /* Emit the addis instruction. */
39518 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
39520 /* Emit the D-form load instruction. */
39521 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39523 return "";
39527 /* Analyze vector computations and remove unnecessary doubleword
39528 swaps (xxswapdi instructions). This pass is performed only
39529 for little-endian VSX code generation.
39531 For this specific case, loads and stores of 4x32 and 2x64 vectors
39532 are inefficient. These are implemented using the lvx2dx and
39533 stvx2dx instructions, which invert the order of doublewords in
39534 a vector register. Thus the code generation inserts an xxswapdi
39535 after each such load, and prior to each such store. (For spill
39536 code after register assignment, an additional xxswapdi is inserted
39537 following each store in order to return a hard register to its
39538 unpermuted value.)
39540 The extra xxswapdi instructions reduce performance. This can be
39541 particularly bad for vectorized code. The purpose of this pass
39542 is to reduce the number of xxswapdi instructions required for
39543 correctness.
39545 The primary insight is that much code that operates on vectors
39546 does not care about the relative order of elements in a register,
39547 so long as the correct memory order is preserved. If we have
39548 a computation where all input values are provided by lvxd2x/xxswapdi
39549 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
39550 and all intermediate computations are pure SIMD (independent of
39551 element order), then all the xxswapdi's associated with the loads
39552 and stores may be removed.
39554 This pass uses some of the infrastructure and logical ideas from
39555 the "web" pass in web.c. We create maximal webs of computations
39556 fitting the description above using union-find. Each such web is
39557 then optimized by removing its unnecessary xxswapdi instructions.
39559 The pass is placed prior to global optimization so that we can
39560 perform the optimization in the safest and simplest way possible;
39561 that is, by replacing each xxswapdi insn with a register copy insn.
39562 Subsequent forward propagation will remove copies where possible.
39564 There are some operations sensitive to element order for which we
39565 can still allow the operation, provided we modify those operations.
39566 These include CONST_VECTORs, for which we must swap the first and
39567 second halves of the constant vector; and SUBREGs, for which we
39568 must adjust the byte offset to account for the swapped doublewords.
39569 A remaining opportunity would be non-immediate-form splats, for
39570 which we should adjust the selected lane of the input. We should
39571 also make code generation adjustments for sum-across operations,
39572 since this is a common vectorizer reduction.
39574 Because we run prior to the first split, we can see loads and stores
39575 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
39576 vector loads and stores that have not yet been split into a permuting
39577 load/store and a swap. (One way this can happen is with a builtin
39578 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
39579 than deleting a swap, we convert the load/store into a permuting
39580 load/store (which effectively removes the swap). */
39582 /* Notes on Permutes
39584 We do not currently handle computations that contain permutes. There
39585 is a general transformation that can be performed correctly, but it
39586 may introduce more expensive code than it replaces. To handle these
39587 would require a cost model to determine when to perform the optimization.
39588 This commentary records how this could be done if desired.
39590 The most general permute is something like this (example for V16QI):
39592 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
39593 (parallel [(const_int a0) (const_int a1)
39595 (const_int a14) (const_int a15)]))
39597 where a0,...,a15 are in [0,31] and select elements from op1 and op2
39598 to produce in the result.
39600 Regardless of mode, we can convert the PARALLEL to a mask of 16
39601 byte-element selectors. Let's call this M, with M[i] representing
39602 the ith byte-element selector value. Then if we swap doublewords
39603 throughout the computation, we can get correct behavior by replacing
39604 M with M' as follows:
39606 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
39607 { ((M[i]+8)%16)+16 : M[i] in [16,31]
39609 This seems promising at first, since we are just replacing one mask
39610 with another. But certain masks are preferable to others. If M
39611 is a mask that matches a vmrghh pattern, for example, M' certainly
39612 will not. Instead of a single vmrghh, we would generate a load of
39613 M' and a vperm. So we would need to know how many xxswapd's we can
39614 remove as a result of this transformation to determine if it's
39615 profitable; and preferably the logic would need to be aware of all
39616 the special preferable masks.
39618 Another form of permute is an UNSPEC_VPERM, in which the mask is
39619 already in a register. In some cases, this mask may be a constant
39620 that we can discover with ud-chains, in which case the above
39621 transformation is ok. However, the common usage here is for the
39622 mask to be produced by an UNSPEC_LVSL, in which case the mask
39623 cannot be known at compile time. In such a case we would have to
39624 generate several instructions to compute M' as above at run time,
39625 and a cost model is needed again.
39627 However, when the mask M for an UNSPEC_VPERM is loaded from the
39628 constant pool, we can replace M with M' as above at no cost
39629 beyond adding a constant pool entry. */
39631 /* This is based on the union-find logic in web.c. web_entry_base is
39632 defined in df.h. */
39633 class swap_web_entry : public web_entry_base
39635 public:
39636 /* Pointer to the insn. */
39637 rtx_insn *insn;
39638 /* Set if insn contains a mention of a vector register. All other
39639 fields are undefined if this field is unset. */
39640 unsigned int is_relevant : 1;
39641 /* Set if insn is a load. */
39642 unsigned int is_load : 1;
39643 /* Set if insn is a store. */
39644 unsigned int is_store : 1;
39645 /* Set if insn is a doubleword swap. This can either be a register swap
39646 or a permuting load or store (test is_load and is_store for this). */
39647 unsigned int is_swap : 1;
39648 /* Set if the insn has a live-in use of a parameter register. */
39649 unsigned int is_live_in : 1;
39650 /* Set if the insn has a live-out def of a return register. */
39651 unsigned int is_live_out : 1;
39652 /* Set if the insn contains a subreg reference of a vector register. */
39653 unsigned int contains_subreg : 1;
39654 /* Set if the insn contains a 128-bit integer operand. */
39655 unsigned int is_128_int : 1;
39656 /* Set if this is a call-insn. */
39657 unsigned int is_call : 1;
39658 /* Set if this insn does not perform a vector operation for which
39659 element order matters, or if we know how to fix it up if it does.
39660 Undefined if is_swap is set. */
39661 unsigned int is_swappable : 1;
39662 /* A nonzero value indicates what kind of special handling for this
39663 insn is required if doublewords are swapped. Undefined if
39664 is_swappable is not set. */
39665 unsigned int special_handling : 4;
39666 /* Set if the web represented by this entry cannot be optimized. */
39667 unsigned int web_not_optimizable : 1;
39668 /* Set if this insn should be deleted. */
39669 unsigned int will_delete : 1;
39672 enum special_handling_values {
39673 SH_NONE = 0,
39674 SH_CONST_VECTOR,
39675 SH_SUBREG,
39676 SH_NOSWAP_LD,
39677 SH_NOSWAP_ST,
39678 SH_EXTRACT,
39679 SH_SPLAT,
39680 SH_XXPERMDI,
39681 SH_CONCAT,
39682 SH_VPERM
39685 /* Union INSN with all insns containing definitions that reach USE.
39686 Detect whether USE is live-in to the current function. */
39687 static void
39688 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
39690 struct df_link *link = DF_REF_CHAIN (use);
39692 if (!link)
39693 insn_entry[INSN_UID (insn)].is_live_in = 1;
39695 while (link)
39697 if (DF_REF_IS_ARTIFICIAL (link->ref))
39698 insn_entry[INSN_UID (insn)].is_live_in = 1;
39700 if (DF_REF_INSN_INFO (link->ref))
39702 rtx def_insn = DF_REF_INSN (link->ref);
39703 (void)unionfind_union (insn_entry + INSN_UID (insn),
39704 insn_entry + INSN_UID (def_insn));
39707 link = link->next;
39711 /* Union INSN with all insns containing uses reached from DEF.
39712 Detect whether DEF is live-out from the current function. */
39713 static void
39714 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
39716 struct df_link *link = DF_REF_CHAIN (def);
39718 if (!link)
39719 insn_entry[INSN_UID (insn)].is_live_out = 1;
39721 while (link)
39723 /* This could be an eh use or some other artificial use;
39724 we treat these all the same (killing the optimization). */
39725 if (DF_REF_IS_ARTIFICIAL (link->ref))
39726 insn_entry[INSN_UID (insn)].is_live_out = 1;
39728 if (DF_REF_INSN_INFO (link->ref))
39730 rtx use_insn = DF_REF_INSN (link->ref);
39731 (void)unionfind_union (insn_entry + INSN_UID (insn),
39732 insn_entry + INSN_UID (use_insn));
39735 link = link->next;
39739 /* Return 1 iff INSN is a load insn, including permuting loads that
39740 represent an lvxd2x instruction; else return 0. */
39741 static unsigned int
39742 insn_is_load_p (rtx insn)
39744 rtx body = PATTERN (insn);
39746 if (GET_CODE (body) == SET)
39748 if (GET_CODE (SET_SRC (body)) == MEM)
39749 return 1;
39751 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
39752 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
39753 return 1;
39755 return 0;
39758 if (GET_CODE (body) != PARALLEL)
39759 return 0;
39761 rtx set = XVECEXP (body, 0, 0);
39763 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
39764 return 1;
39766 return 0;
39769 /* Return 1 iff INSN is a store insn, including permuting stores that
39770 represent an stvxd2x instruction; else return 0. */
39771 static unsigned int
39772 insn_is_store_p (rtx insn)
39774 rtx body = PATTERN (insn);
39775 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
39776 return 1;
39777 if (GET_CODE (body) != PARALLEL)
39778 return 0;
39779 rtx set = XVECEXP (body, 0, 0);
39780 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
39781 return 1;
39782 return 0;
39785 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
39786 a permuting load, or a permuting store. */
39787 static unsigned int
39788 insn_is_swap_p (rtx insn)
39790 rtx body = PATTERN (insn);
39791 if (GET_CODE (body) != SET)
39792 return 0;
39793 rtx rhs = SET_SRC (body);
39794 if (GET_CODE (rhs) != VEC_SELECT)
39795 return 0;
39796 rtx parallel = XEXP (rhs, 1);
39797 if (GET_CODE (parallel) != PARALLEL)
39798 return 0;
39799 unsigned int len = XVECLEN (parallel, 0);
39800 if (len != 2 && len != 4 && len != 8 && len != 16)
39801 return 0;
39802 for (unsigned int i = 0; i < len / 2; ++i)
39804 rtx op = XVECEXP (parallel, 0, i);
39805 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
39806 return 0;
39808 for (unsigned int i = len / 2; i < len; ++i)
39810 rtx op = XVECEXP (parallel, 0, i);
39811 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
39812 return 0;
39814 return 1;
39817 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
39818 static bool
39819 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
39821 unsigned uid = INSN_UID (insn);
39822 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
39823 return false;
39825 /* Find the unique use in the swap and locate its def. If the def
39826 isn't unique, punt. */
39827 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39828 df_ref use;
39829 FOR_EACH_INSN_INFO_USE (use, insn_info)
39831 struct df_link *def_link = DF_REF_CHAIN (use);
39832 if (!def_link || def_link->next)
39833 return false;
39835 rtx def_insn = DF_REF_INSN (def_link->ref);
39836 unsigned uid2 = INSN_UID (def_insn);
39837 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
39838 return false;
39840 rtx body = PATTERN (def_insn);
39841 if (GET_CODE (body) != SET
39842 || GET_CODE (SET_SRC (body)) != VEC_SELECT
39843 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
39844 return false;
39846 rtx mem = XEXP (SET_SRC (body), 0);
39847 rtx base_reg = XEXP (mem, 0);
39849 df_ref base_use;
39850 insn_info = DF_INSN_INFO_GET (def_insn);
39851 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
39853 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
39854 continue;
39856 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
39857 if (!base_def_link || base_def_link->next)
39858 return false;
39860 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
39861 rtx tocrel_body = PATTERN (tocrel_insn);
39862 rtx base, offset;
39863 if (GET_CODE (tocrel_body) != SET)
39864 return false;
39865 /* There is an extra level of indirection for small/large
39866 code models. */
39867 rtx tocrel_expr = SET_SRC (tocrel_body);
39868 if (GET_CODE (tocrel_expr) == MEM)
39869 tocrel_expr = XEXP (tocrel_expr, 0);
39870 if (!toc_relative_expr_p (tocrel_expr, false))
39871 return false;
39872 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
39873 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
39874 return false;
39877 return true;
39880 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
39881 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
39882 static bool
39883 v2df_reduction_p (rtx op)
39885 if (GET_MODE (op) != V2DFmode)
39886 return false;
39888 enum rtx_code code = GET_CODE (op);
39889 if (code != PLUS && code != SMIN && code != SMAX)
39890 return false;
39892 rtx concat = XEXP (op, 0);
39893 if (GET_CODE (concat) != VEC_CONCAT)
39894 return false;
39896 rtx select0 = XEXP (concat, 0);
39897 rtx select1 = XEXP (concat, 1);
39898 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
39899 return false;
39901 rtx reg0 = XEXP (select0, 0);
39902 rtx reg1 = XEXP (select1, 0);
39903 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
39904 return false;
39906 rtx parallel0 = XEXP (select0, 1);
39907 rtx parallel1 = XEXP (select1, 1);
39908 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
39909 return false;
39911 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
39912 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
39913 return false;
39915 return true;
39918 /* Return 1 iff OP is an operand that will not be affected by having
39919 vector doublewords swapped in memory. */
39920 static unsigned int
39921 rtx_is_swappable_p (rtx op, unsigned int *special)
39923 enum rtx_code code = GET_CODE (op);
39924 int i, j;
39925 rtx parallel;
39927 switch (code)
39929 case LABEL_REF:
39930 case SYMBOL_REF:
39931 case CLOBBER:
39932 case REG:
39933 return 1;
39935 case VEC_CONCAT:
39936 case ASM_INPUT:
39937 case ASM_OPERANDS:
39938 return 0;
39940 case CONST_VECTOR:
39942 *special = SH_CONST_VECTOR;
39943 return 1;
39946 case VEC_DUPLICATE:
39947 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
39948 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
39949 it represents a vector splat for which we can do special
39950 handling. */
39951 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
39952 return 1;
39953 else if (REG_P (XEXP (op, 0))
39954 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
39955 /* This catches V2DF and V2DI splat, at a minimum. */
39956 return 1;
39957 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
39958 && REG_P (XEXP (XEXP (op, 0), 0))
39959 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
39960 /* This catches splat of a truncated value. */
39961 return 1;
39962 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
39963 /* If the duplicated item is from a select, defer to the select
39964 processing to see if we can change the lane for the splat. */
39965 return rtx_is_swappable_p (XEXP (op, 0), special);
39966 else
39967 return 0;
39969 case VEC_SELECT:
39970 /* A vec_extract operation is ok if we change the lane. */
39971 if (GET_CODE (XEXP (op, 0)) == REG
39972 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
39973 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
39974 && XVECLEN (parallel, 0) == 1
39975 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
39977 *special = SH_EXTRACT;
39978 return 1;
39980 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
39981 XXPERMDI is a swap operation, it will be identified by
39982 insn_is_swap_p and therefore we won't get here. */
39983 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
39984 && (GET_MODE (XEXP (op, 0)) == V4DFmode
39985 || GET_MODE (XEXP (op, 0)) == V4DImode)
39986 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
39987 && XVECLEN (parallel, 0) == 2
39988 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
39989 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
39991 *special = SH_XXPERMDI;
39992 return 1;
39994 else if (v2df_reduction_p (op))
39995 return 1;
39996 else
39997 return 0;
39999 case UNSPEC:
40001 /* Various operations are unsafe for this optimization, at least
40002 without significant additional work. Permutes are obviously
40003 problematic, as both the permute control vector and the ordering
40004 of the target values are invalidated by doubleword swapping.
40005 Vector pack and unpack modify the number of vector lanes.
40006 Merge-high/low will not operate correctly on swapped operands.
40007 Vector shifts across element boundaries are clearly uncool,
40008 as are vector select and concatenate operations. Vector
40009 sum-across instructions define one operand with a specific
40010 order-dependent element, so additional fixup code would be
40011 needed to make those work. Vector set and non-immediate-form
40012 vector splat are element-order sensitive. A few of these
40013 cases might be workable with special handling if required.
40014 Adding cost modeling would be appropriate in some cases. */
40015 int val = XINT (op, 1);
40016 switch (val)
40018 default:
40019 break;
40020 case UNSPEC_VMRGH_DIRECT:
40021 case UNSPEC_VMRGL_DIRECT:
40022 case UNSPEC_VPACK_SIGN_SIGN_SAT:
40023 case UNSPEC_VPACK_SIGN_UNS_SAT:
40024 case UNSPEC_VPACK_UNS_UNS_MOD:
40025 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
40026 case UNSPEC_VPACK_UNS_UNS_SAT:
40027 case UNSPEC_VPERM:
40028 case UNSPEC_VPERM_UNS:
40029 case UNSPEC_VPERMHI:
40030 case UNSPEC_VPERMSI:
40031 case UNSPEC_VPKPX:
40032 case UNSPEC_VSLDOI:
40033 case UNSPEC_VSLO:
40034 case UNSPEC_VSRO:
40035 case UNSPEC_VSUM2SWS:
40036 case UNSPEC_VSUM4S:
40037 case UNSPEC_VSUM4UBS:
40038 case UNSPEC_VSUMSWS:
40039 case UNSPEC_VSUMSWS_DIRECT:
40040 case UNSPEC_VSX_CONCAT:
40041 case UNSPEC_VSX_SET:
40042 case UNSPEC_VSX_SLDWI:
40043 case UNSPEC_VUNPACK_HI_SIGN:
40044 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
40045 case UNSPEC_VUNPACK_LO_SIGN:
40046 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
40047 case UNSPEC_VUPKHPX:
40048 case UNSPEC_VUPKHS_V4SF:
40049 case UNSPEC_VUPKHU_V4SF:
40050 case UNSPEC_VUPKLPX:
40051 case UNSPEC_VUPKLS_V4SF:
40052 case UNSPEC_VUPKLU_V4SF:
40053 case UNSPEC_VSX_CVDPSPN:
40054 case UNSPEC_VSX_CVSPDP:
40055 case UNSPEC_VSX_CVSPDPN:
40056 case UNSPEC_VSX_EXTRACT:
40057 case UNSPEC_VSX_VSLO:
40058 case UNSPEC_VSX_VEC_INIT:
40059 return 0;
40060 case UNSPEC_VSPLT_DIRECT:
40061 *special = SH_SPLAT;
40062 return 1;
40063 case UNSPEC_REDUC_PLUS:
40064 case UNSPEC_REDUC:
40065 return 1;
40069 default:
40070 break;
40073 const char *fmt = GET_RTX_FORMAT (code);
40074 int ok = 1;
40076 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40077 if (fmt[i] == 'e' || fmt[i] == 'u')
40079 unsigned int special_op = SH_NONE;
40080 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
40081 if (special_op == SH_NONE)
40082 continue;
40083 /* Ensure we never have two kinds of special handling
40084 for the same insn. */
40085 if (*special != SH_NONE && *special != special_op)
40086 return 0;
40087 *special = special_op;
40089 else if (fmt[i] == 'E')
40090 for (j = 0; j < XVECLEN (op, i); ++j)
40092 unsigned int special_op = SH_NONE;
40093 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
40094 if (special_op == SH_NONE)
40095 continue;
40096 /* Ensure we never have two kinds of special handling
40097 for the same insn. */
40098 if (*special != SH_NONE && *special != special_op)
40099 return 0;
40100 *special = special_op;
40103 return ok;
40106 /* Return 1 iff INSN is an operand that will not be affected by
40107 having vector doublewords swapped in memory (in which case
40108 *SPECIAL is unchanged), or that can be modified to be correct
40109 if vector doublewords are swapped in memory (in which case
40110 *SPECIAL is changed to a value indicating how). */
40111 static unsigned int
40112 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
40113 unsigned int *special)
40115 /* Calls are always bad. */
40116 if (GET_CODE (insn) == CALL_INSN)
40117 return 0;
40119 /* Loads and stores seen here are not permuting, but we can still
40120 fix them up by converting them to permuting ones. Exceptions:
40121 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
40122 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
40123 for the SET source. Also we must now make an exception for lvx
40124 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
40125 explicit "& -16") since this leads to unrecognizable insns. */
40126 rtx body = PATTERN (insn);
40127 int i = INSN_UID (insn);
40129 if (insn_entry[i].is_load)
40131 if (GET_CODE (body) == SET)
40133 rtx rhs = SET_SRC (body);
40134 gcc_assert (GET_CODE (rhs) == MEM);
40135 if (GET_CODE (XEXP (rhs, 0)) == AND)
40136 return 0;
40138 *special = SH_NOSWAP_LD;
40139 return 1;
40141 else
40142 return 0;
40145 if (insn_entry[i].is_store)
40147 if (GET_CODE (body) == SET
40148 && GET_CODE (SET_SRC (body)) != UNSPEC)
40150 rtx lhs = SET_DEST (body);
40151 gcc_assert (GET_CODE (lhs) == MEM);
40152 if (GET_CODE (XEXP (lhs, 0)) == AND)
40153 return 0;
40155 *special = SH_NOSWAP_ST;
40156 return 1;
40158 else
40159 return 0;
40162 /* A convert to single precision can be left as is provided that
40163 all of its uses are in xxspltw instructions that splat BE element
40164 zero. */
40165 if (GET_CODE (body) == SET
40166 && GET_CODE (SET_SRC (body)) == UNSPEC
40167 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
40169 df_ref def;
40170 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40172 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40174 struct df_link *link = DF_REF_CHAIN (def);
40175 if (!link)
40176 return 0;
40178 for (; link; link = link->next) {
40179 rtx use_insn = DF_REF_INSN (link->ref);
40180 rtx use_body = PATTERN (use_insn);
40181 if (GET_CODE (use_body) != SET
40182 || GET_CODE (SET_SRC (use_body)) != UNSPEC
40183 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
40184 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
40185 return 0;
40189 return 1;
40192 /* A concatenation of two doublewords is ok if we reverse the
40193 order of the inputs. */
40194 if (GET_CODE (body) == SET
40195 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
40196 && (GET_MODE (SET_SRC (body)) == V2DFmode
40197 || GET_MODE (SET_SRC (body)) == V2DImode))
40199 *special = SH_CONCAT;
40200 return 1;
40203 /* V2DF reductions are always swappable. */
40204 if (GET_CODE (body) == PARALLEL)
40206 rtx expr = XVECEXP (body, 0, 0);
40207 if (GET_CODE (expr) == SET
40208 && v2df_reduction_p (SET_SRC (expr)))
40209 return 1;
40212 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
40213 constant pool. */
40214 if (GET_CODE (body) == SET
40215 && GET_CODE (SET_SRC (body)) == UNSPEC
40216 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
40217 && XVECLEN (SET_SRC (body), 0) == 3
40218 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
40220 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
40221 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40222 df_ref use;
40223 FOR_EACH_INSN_INFO_USE (use, insn_info)
40224 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40226 struct df_link *def_link = DF_REF_CHAIN (use);
40227 /* Punt if multiple definitions for this reg. */
40228 if (def_link && !def_link->next &&
40229 const_load_sequence_p (insn_entry,
40230 DF_REF_INSN (def_link->ref)))
40232 *special = SH_VPERM;
40233 return 1;
40238 /* Otherwise check the operands for vector lane violations. */
40239 return rtx_is_swappable_p (body, special);
40242 enum chain_purpose { FOR_LOADS, FOR_STORES };
40244 /* Return true if the UD or DU chain headed by LINK is non-empty,
40245 and every entry on the chain references an insn that is a
40246 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
40247 register swap must have only permuting loads as reaching defs.
40248 If PURPOSE is FOR_STORES, each such register swap must have only
40249 register swaps or permuting stores as reached uses. */
40250 static bool
40251 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
40252 enum chain_purpose purpose)
40254 if (!link)
40255 return false;
40257 for (; link; link = link->next)
40259 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
40260 continue;
40262 if (DF_REF_IS_ARTIFICIAL (link->ref))
40263 return false;
40265 rtx reached_insn = DF_REF_INSN (link->ref);
40266 unsigned uid = INSN_UID (reached_insn);
40267 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
40269 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
40270 || insn_entry[uid].is_store)
40271 return false;
40273 if (purpose == FOR_LOADS)
40275 df_ref use;
40276 FOR_EACH_INSN_INFO_USE (use, insn_info)
40278 struct df_link *swap_link = DF_REF_CHAIN (use);
40280 while (swap_link)
40282 if (DF_REF_IS_ARTIFICIAL (link->ref))
40283 return false;
40285 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
40286 unsigned uid2 = INSN_UID (swap_def_insn);
40288 /* Only permuting loads are allowed. */
40289 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
40290 return false;
40292 swap_link = swap_link->next;
40296 else if (purpose == FOR_STORES)
40298 df_ref def;
40299 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40301 struct df_link *swap_link = DF_REF_CHAIN (def);
40303 while (swap_link)
40305 if (DF_REF_IS_ARTIFICIAL (link->ref))
40306 return false;
40308 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
40309 unsigned uid2 = INSN_UID (swap_use_insn);
40311 /* Permuting stores or register swaps are allowed. */
40312 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
40313 return false;
40315 swap_link = swap_link->next;
40321 return true;
40324 /* Mark the xxswapdi instructions associated with permuting loads and
40325 stores for removal. Note that we only flag them for deletion here,
40326 as there is a possibility of a swap being reached from multiple
40327 loads, etc. */
40328 static void
40329 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
40331 rtx insn = insn_entry[i].insn;
40332 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40334 if (insn_entry[i].is_load)
40336 df_ref def;
40337 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40339 struct df_link *link = DF_REF_CHAIN (def);
40341 /* We know by now that these are swaps, so we can delete
40342 them confidently. */
40343 while (link)
40345 rtx use_insn = DF_REF_INSN (link->ref);
40346 insn_entry[INSN_UID (use_insn)].will_delete = 1;
40347 link = link->next;
40351 else if (insn_entry[i].is_store)
40353 df_ref use;
40354 FOR_EACH_INSN_INFO_USE (use, insn_info)
40356 /* Ignore uses for addressability. */
40357 machine_mode mode = GET_MODE (DF_REF_REG (use));
40358 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
40359 continue;
40361 struct df_link *link = DF_REF_CHAIN (use);
40363 /* We know by now that these are swaps, so we can delete
40364 them confidently. */
40365 while (link)
40367 rtx def_insn = DF_REF_INSN (link->ref);
40368 insn_entry[INSN_UID (def_insn)].will_delete = 1;
40369 link = link->next;
40375 /* OP is either a CONST_VECTOR or an expression containing one.
40376 Swap the first half of the vector with the second in the first
40377 case. Recurse to find it in the second. */
40378 static void
40379 swap_const_vector_halves (rtx op)
40381 int i;
40382 enum rtx_code code = GET_CODE (op);
40383 if (GET_CODE (op) == CONST_VECTOR)
40385 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
40386 for (i = 0; i < half_units; ++i)
40388 rtx temp = CONST_VECTOR_ELT (op, i);
40389 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
40390 CONST_VECTOR_ELT (op, i + half_units) = temp;
40393 else
40395 int j;
40396 const char *fmt = GET_RTX_FORMAT (code);
40397 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40398 if (fmt[i] == 'e' || fmt[i] == 'u')
40399 swap_const_vector_halves (XEXP (op, i));
40400 else if (fmt[i] == 'E')
40401 for (j = 0; j < XVECLEN (op, i); ++j)
40402 swap_const_vector_halves (XVECEXP (op, i, j));
40406 /* Find all subregs of a vector expression that perform a narrowing,
40407 and adjust the subreg index to account for doubleword swapping. */
40408 static void
40409 adjust_subreg_index (rtx op)
40411 enum rtx_code code = GET_CODE (op);
40412 if (code == SUBREG
40413 && (GET_MODE_SIZE (GET_MODE (op))
40414 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
40416 unsigned int index = SUBREG_BYTE (op);
40417 if (index < 8)
40418 index += 8;
40419 else
40420 index -= 8;
40421 SUBREG_BYTE (op) = index;
40424 const char *fmt = GET_RTX_FORMAT (code);
40425 int i,j;
40426 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40427 if (fmt[i] == 'e' || fmt[i] == 'u')
40428 adjust_subreg_index (XEXP (op, i));
40429 else if (fmt[i] == 'E')
40430 for (j = 0; j < XVECLEN (op, i); ++j)
40431 adjust_subreg_index (XVECEXP (op, i, j));
40434 /* Convert the non-permuting load INSN to a permuting one. */
40435 static void
40436 permute_load (rtx_insn *insn)
40438 rtx body = PATTERN (insn);
40439 rtx mem_op = SET_SRC (body);
40440 rtx tgt_reg = SET_DEST (body);
40441 machine_mode mode = GET_MODE (tgt_reg);
40442 int n_elts = GET_MODE_NUNITS (mode);
40443 int half_elts = n_elts / 2;
40444 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40445 int i, j;
40446 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40447 XVECEXP (par, 0, i) = GEN_INT (j);
40448 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40449 XVECEXP (par, 0, i) = GEN_INT (j);
40450 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
40451 SET_SRC (body) = sel;
40452 INSN_CODE (insn) = -1; /* Force re-recognition. */
40453 df_insn_rescan (insn);
40455 if (dump_file)
40456 fprintf (dump_file, "Replacing load %d with permuted load\n",
40457 INSN_UID (insn));
40460 /* Convert the non-permuting store INSN to a permuting one. */
40461 static void
40462 permute_store (rtx_insn *insn)
40464 rtx body = PATTERN (insn);
40465 rtx src_reg = SET_SRC (body);
40466 machine_mode mode = GET_MODE (src_reg);
40467 int n_elts = GET_MODE_NUNITS (mode);
40468 int half_elts = n_elts / 2;
40469 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40470 int i, j;
40471 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40472 XVECEXP (par, 0, i) = GEN_INT (j);
40473 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40474 XVECEXP (par, 0, i) = GEN_INT (j);
40475 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
40476 SET_SRC (body) = sel;
40477 INSN_CODE (insn) = -1; /* Force re-recognition. */
40478 df_insn_rescan (insn);
40480 if (dump_file)
40481 fprintf (dump_file, "Replacing store %d with permuted store\n",
40482 INSN_UID (insn));
40485 /* Given OP that contains a vector extract operation, adjust the index
40486 of the extracted lane to account for the doubleword swap. */
40487 static void
40488 adjust_extract (rtx_insn *insn)
40490 rtx pattern = PATTERN (insn);
40491 if (GET_CODE (pattern) == PARALLEL)
40492 pattern = XVECEXP (pattern, 0, 0);
40493 rtx src = SET_SRC (pattern);
40494 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
40495 account for that. */
40496 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
40497 rtx par = XEXP (sel, 1);
40498 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
40499 int lane = INTVAL (XVECEXP (par, 0, 0));
40500 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40501 XVECEXP (par, 0, 0) = GEN_INT (lane);
40502 INSN_CODE (insn) = -1; /* Force re-recognition. */
40503 df_insn_rescan (insn);
40505 if (dump_file)
40506 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
40509 /* Given OP that contains a vector direct-splat operation, adjust the index
40510 of the source lane to account for the doubleword swap. */
40511 static void
40512 adjust_splat (rtx_insn *insn)
40514 rtx body = PATTERN (insn);
40515 rtx unspec = XEXP (body, 1);
40516 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
40517 int lane = INTVAL (XVECEXP (unspec, 0, 1));
40518 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40519 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
40520 INSN_CODE (insn) = -1; /* Force re-recognition. */
40521 df_insn_rescan (insn);
40523 if (dump_file)
40524 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
40527 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
40528 swap), reverse the order of the source operands and adjust the indices
40529 of the source lanes to account for doubleword reversal. */
40530 static void
40531 adjust_xxpermdi (rtx_insn *insn)
40533 rtx set = PATTERN (insn);
40534 rtx select = XEXP (set, 1);
40535 rtx concat = XEXP (select, 0);
40536 rtx src0 = XEXP (concat, 0);
40537 XEXP (concat, 0) = XEXP (concat, 1);
40538 XEXP (concat, 1) = src0;
40539 rtx parallel = XEXP (select, 1);
40540 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
40541 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
40542 int new_lane0 = 3 - lane1;
40543 int new_lane1 = 3 - lane0;
40544 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
40545 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
40546 INSN_CODE (insn) = -1; /* Force re-recognition. */
40547 df_insn_rescan (insn);
40549 if (dump_file)
40550 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
40553 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
40554 reverse the order of those inputs. */
40555 static void
40556 adjust_concat (rtx_insn *insn)
40558 rtx set = PATTERN (insn);
40559 rtx concat = XEXP (set, 1);
40560 rtx src0 = XEXP (concat, 0);
40561 XEXP (concat, 0) = XEXP (concat, 1);
40562 XEXP (concat, 1) = src0;
40563 INSN_CODE (insn) = -1; /* Force re-recognition. */
40564 df_insn_rescan (insn);
40566 if (dump_file)
40567 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
40570 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
40571 constant pool to reflect swapped doublewords. */
40572 static void
40573 adjust_vperm (rtx_insn *insn)
40575 /* We previously determined that the UNSPEC_VPERM was fed by a
40576 swap of a swapping load of a TOC-relative constant pool symbol.
40577 Find the MEM in the swapping load and replace it with a MEM for
40578 the adjusted mask constant. */
40579 rtx set = PATTERN (insn);
40580 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
40582 /* Find the swap. */
40583 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40584 df_ref use;
40585 rtx_insn *swap_insn = 0;
40586 FOR_EACH_INSN_INFO_USE (use, insn_info)
40587 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40589 struct df_link *def_link = DF_REF_CHAIN (use);
40590 gcc_assert (def_link && !def_link->next);
40591 swap_insn = DF_REF_INSN (def_link->ref);
40592 break;
40594 gcc_assert (swap_insn);
40596 /* Find the load. */
40597 insn_info = DF_INSN_INFO_GET (swap_insn);
40598 rtx_insn *load_insn = 0;
40599 FOR_EACH_INSN_INFO_USE (use, insn_info)
40601 struct df_link *def_link = DF_REF_CHAIN (use);
40602 gcc_assert (def_link && !def_link->next);
40603 load_insn = DF_REF_INSN (def_link->ref);
40604 break;
40606 gcc_assert (load_insn);
40608 /* Find the TOC-relative symbol access. */
40609 insn_info = DF_INSN_INFO_GET (load_insn);
40610 rtx_insn *tocrel_insn = 0;
40611 FOR_EACH_INSN_INFO_USE (use, insn_info)
40613 struct df_link *def_link = DF_REF_CHAIN (use);
40614 gcc_assert (def_link && !def_link->next);
40615 tocrel_insn = DF_REF_INSN (def_link->ref);
40616 break;
40618 gcc_assert (tocrel_insn);
40620 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
40621 to set tocrel_base; otherwise it would be unnecessary as we've
40622 already established it will return true. */
40623 rtx base, offset;
40624 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
40625 /* There is an extra level of indirection for small/large code models. */
40626 if (GET_CODE (tocrel_expr) == MEM)
40627 tocrel_expr = XEXP (tocrel_expr, 0);
40628 if (!toc_relative_expr_p (tocrel_expr, false))
40629 gcc_unreachable ();
40630 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40631 rtx const_vector = get_pool_constant (base);
40632 /* With the extra indirection, get_pool_constant will produce the
40633 real constant from the reg_equal expression, so get the real
40634 constant. */
40635 if (GET_CODE (const_vector) == SYMBOL_REF)
40636 const_vector = get_pool_constant (const_vector);
40637 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
40639 /* Create an adjusted mask from the initial mask. */
40640 unsigned int new_mask[16], i, val;
40641 for (i = 0; i < 16; ++i) {
40642 val = INTVAL (XVECEXP (const_vector, 0, i));
40643 if (val < 16)
40644 new_mask[i] = (val + 8) % 16;
40645 else
40646 new_mask[i] = ((val + 8) % 16) + 16;
40649 /* Create a new CONST_VECTOR and a MEM that references it. */
40650 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
40651 for (i = 0; i < 16; ++i)
40652 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
40653 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
40654 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
40655 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
40656 can't recognize. Force the SYMBOL_REF into a register. */
40657 if (!REG_P (XEXP (new_mem, 0))) {
40658 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
40659 XEXP (new_mem, 0) = base_reg;
40660 /* Move the newly created insn ahead of the load insn. */
40661 rtx_insn *force_insn = get_last_insn ();
40662 remove_insn (force_insn);
40663 rtx_insn *before_load_insn = PREV_INSN (load_insn);
40664 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
40665 df_insn_rescan (before_load_insn);
40666 df_insn_rescan (force_insn);
40669 /* Replace the MEM in the load instruction and rescan it. */
40670 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
40671 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
40672 df_insn_rescan (load_insn);
40674 if (dump_file)
40675 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
40678 /* The insn described by INSN_ENTRY[I] can be swapped, but only
40679 with special handling. Take care of that here. */
40680 static void
40681 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
40683 rtx_insn *insn = insn_entry[i].insn;
40684 rtx body = PATTERN (insn);
40686 switch (insn_entry[i].special_handling)
40688 default:
40689 gcc_unreachable ();
40690 case SH_CONST_VECTOR:
40692 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
40693 gcc_assert (GET_CODE (body) == SET);
40694 rtx rhs = SET_SRC (body);
40695 swap_const_vector_halves (rhs);
40696 if (dump_file)
40697 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
40698 break;
40700 case SH_SUBREG:
40701 /* A subreg of the same size is already safe. For subregs that
40702 select a smaller portion of a reg, adjust the index for
40703 swapped doublewords. */
40704 adjust_subreg_index (body);
40705 if (dump_file)
40706 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
40707 break;
40708 case SH_NOSWAP_LD:
40709 /* Convert a non-permuting load to a permuting one. */
40710 permute_load (insn);
40711 break;
40712 case SH_NOSWAP_ST:
40713 /* Convert a non-permuting store to a permuting one. */
40714 permute_store (insn);
40715 break;
40716 case SH_EXTRACT:
40717 /* Change the lane on an extract operation. */
40718 adjust_extract (insn);
40719 break;
40720 case SH_SPLAT:
40721 /* Change the lane on a direct-splat operation. */
40722 adjust_splat (insn);
40723 break;
40724 case SH_XXPERMDI:
40725 /* Change the lanes on an XXPERMDI operation. */
40726 adjust_xxpermdi (insn);
40727 break;
40728 case SH_CONCAT:
40729 /* Reverse the order of a concatenation operation. */
40730 adjust_concat (insn);
40731 break;
40732 case SH_VPERM:
40733 /* Change the mask loaded from the constant pool for a VPERM. */
40734 adjust_vperm (insn);
40735 break;
40739 /* Find the insn from the Ith table entry, which is known to be a
40740 register swap Y = SWAP(X). Replace it with a copy Y = X. */
40741 static void
40742 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
40744 rtx_insn *insn = insn_entry[i].insn;
40745 rtx body = PATTERN (insn);
40746 rtx src_reg = XEXP (SET_SRC (body), 0);
40747 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
40748 rtx_insn *new_insn = emit_insn_before (copy, insn);
40749 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
40750 df_insn_rescan (new_insn);
40752 if (dump_file)
40754 unsigned int new_uid = INSN_UID (new_insn);
40755 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
40758 df_insn_delete (insn);
40759 remove_insn (insn);
40760 insn->set_deleted ();
40763 /* Dump the swap table to DUMP_FILE. */
40764 static void
40765 dump_swap_insn_table (swap_web_entry *insn_entry)
40767 int e = get_max_uid ();
40768 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
40770 for (int i = 0; i < e; ++i)
40771 if (insn_entry[i].is_relevant)
40773 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
40774 fprintf (dump_file, "%6d %6d ", i,
40775 pred_entry && pred_entry->insn
40776 ? INSN_UID (pred_entry->insn) : 0);
40777 if (insn_entry[i].is_load)
40778 fputs ("load ", dump_file);
40779 if (insn_entry[i].is_store)
40780 fputs ("store ", dump_file);
40781 if (insn_entry[i].is_swap)
40782 fputs ("swap ", dump_file);
40783 if (insn_entry[i].is_live_in)
40784 fputs ("live-in ", dump_file);
40785 if (insn_entry[i].is_live_out)
40786 fputs ("live-out ", dump_file);
40787 if (insn_entry[i].contains_subreg)
40788 fputs ("subreg ", dump_file);
40789 if (insn_entry[i].is_128_int)
40790 fputs ("int128 ", dump_file);
40791 if (insn_entry[i].is_call)
40792 fputs ("call ", dump_file);
40793 if (insn_entry[i].is_swappable)
40795 fputs ("swappable ", dump_file);
40796 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
40797 fputs ("special:constvec ", dump_file);
40798 else if (insn_entry[i].special_handling == SH_SUBREG)
40799 fputs ("special:subreg ", dump_file);
40800 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
40801 fputs ("special:load ", dump_file);
40802 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
40803 fputs ("special:store ", dump_file);
40804 else if (insn_entry[i].special_handling == SH_EXTRACT)
40805 fputs ("special:extract ", dump_file);
40806 else if (insn_entry[i].special_handling == SH_SPLAT)
40807 fputs ("special:splat ", dump_file);
40808 else if (insn_entry[i].special_handling == SH_XXPERMDI)
40809 fputs ("special:xxpermdi ", dump_file);
40810 else if (insn_entry[i].special_handling == SH_CONCAT)
40811 fputs ("special:concat ", dump_file);
40812 else if (insn_entry[i].special_handling == SH_VPERM)
40813 fputs ("special:vperm ", dump_file);
40815 if (insn_entry[i].web_not_optimizable)
40816 fputs ("unoptimizable ", dump_file);
40817 if (insn_entry[i].will_delete)
40818 fputs ("delete ", dump_file);
40819 fputs ("\n", dump_file);
40821 fputs ("\n", dump_file);
40824 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
40825 Here RTX is an (& addr (const_int -16)). Always return a new copy
40826 to avoid problems with combine. */
40827 static rtx
40828 alignment_with_canonical_addr (rtx align)
40830 rtx canon;
40831 rtx addr = XEXP (align, 0);
40833 if (REG_P (addr))
40834 canon = addr;
40836 else if (GET_CODE (addr) == PLUS)
40838 rtx addrop0 = XEXP (addr, 0);
40839 rtx addrop1 = XEXP (addr, 1);
40841 if (!REG_P (addrop0))
40842 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
40844 if (!REG_P (addrop1))
40845 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
40847 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
40850 else
40851 canon = force_reg (GET_MODE (addr), addr);
40853 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
40856 /* Check whether an rtx is an alignment mask, and if so, return
40857 a fully-expanded rtx for the masking operation. */
40858 static rtx
40859 alignment_mask (rtx_insn *insn)
40861 rtx body = PATTERN (insn);
40863 if (GET_CODE (body) != SET
40864 || GET_CODE (SET_SRC (body)) != AND
40865 || !REG_P (XEXP (SET_SRC (body), 0)))
40866 return 0;
40868 rtx mask = XEXP (SET_SRC (body), 1);
40870 if (GET_CODE (mask) == CONST_INT)
40872 if (INTVAL (mask) == -16)
40873 return alignment_with_canonical_addr (SET_SRC (body));
40874 else
40875 return 0;
40878 if (!REG_P (mask))
40879 return 0;
40881 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40882 df_ref use;
40883 rtx real_mask = 0;
40885 FOR_EACH_INSN_INFO_USE (use, insn_info)
40887 if (!rtx_equal_p (DF_REF_REG (use), mask))
40888 continue;
40890 struct df_link *def_link = DF_REF_CHAIN (use);
40891 if (!def_link || def_link->next)
40892 return 0;
40894 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
40895 rtx const_body = PATTERN (const_insn);
40896 if (GET_CODE (const_body) != SET)
40897 return 0;
40899 real_mask = SET_SRC (const_body);
40901 if (GET_CODE (real_mask) != CONST_INT
40902 || INTVAL (real_mask) != -16)
40903 return 0;
40906 if (real_mask == 0)
40907 return 0;
40909 return alignment_with_canonical_addr (SET_SRC (body));
40912 /* Given INSN that's a load or store based at BASE_REG, look for a
40913 feeding computation that aligns its address on a 16-byte boundary. */
40914 static rtx
40915 find_alignment_op (rtx_insn *insn, rtx base_reg)
40917 df_ref base_use;
40918 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40919 rtx and_operation = 0;
40921 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
40923 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
40924 continue;
40926 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
40927 if (!base_def_link || base_def_link->next)
40928 break;
40930 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
40931 and_operation = alignment_mask (and_insn);
40932 if (and_operation != 0)
40933 break;
40936 return and_operation;
40939 struct del_info { bool replace; rtx_insn *replace_insn; };
40941 /* If INSN is the load for an lvx pattern, put it in canonical form. */
40942 static void
40943 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
40945 rtx body = PATTERN (insn);
40946 gcc_assert (GET_CODE (body) == SET
40947 && GET_CODE (SET_SRC (body)) == VEC_SELECT
40948 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
40950 rtx mem = XEXP (SET_SRC (body), 0);
40951 rtx base_reg = XEXP (mem, 0);
40953 rtx and_operation = find_alignment_op (insn, base_reg);
40955 if (and_operation != 0)
40957 df_ref def;
40958 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40959 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40961 struct df_link *link = DF_REF_CHAIN (def);
40962 if (!link || link->next)
40963 break;
40965 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
40966 if (!insn_is_swap_p (swap_insn)
40967 || insn_is_load_p (swap_insn)
40968 || insn_is_store_p (swap_insn))
40969 break;
40971 /* Expected lvx pattern found. Change the swap to
40972 a copy, and propagate the AND operation into the
40973 load. */
40974 to_delete[INSN_UID (swap_insn)].replace = true;
40975 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
40977 XEXP (mem, 0) = and_operation;
40978 SET_SRC (body) = mem;
40979 INSN_CODE (insn) = -1; /* Force re-recognition. */
40980 df_insn_rescan (insn);
40982 if (dump_file)
40983 fprintf (dump_file, "lvx opportunity found at %d\n",
40984 INSN_UID (insn));
40989 /* If INSN is the store for an stvx pattern, put it in canonical form. */
40990 static void
40991 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
40993 rtx body = PATTERN (insn);
40994 gcc_assert (GET_CODE (body) == SET
40995 && GET_CODE (SET_DEST (body)) == MEM
40996 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
40997 rtx mem = SET_DEST (body);
40998 rtx base_reg = XEXP (mem, 0);
41000 rtx and_operation = find_alignment_op (insn, base_reg);
41002 if (and_operation != 0)
41004 rtx src_reg = XEXP (SET_SRC (body), 0);
41005 df_ref src_use;
41006 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41007 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
41009 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
41010 continue;
41012 struct df_link *link = DF_REF_CHAIN (src_use);
41013 if (!link || link->next)
41014 break;
41016 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
41017 if (!insn_is_swap_p (swap_insn)
41018 || insn_is_load_p (swap_insn)
41019 || insn_is_store_p (swap_insn))
41020 break;
41022 /* Expected stvx pattern found. Change the swap to
41023 a copy, and propagate the AND operation into the
41024 store. */
41025 to_delete[INSN_UID (swap_insn)].replace = true;
41026 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
41028 XEXP (mem, 0) = and_operation;
41029 SET_SRC (body) = src_reg;
41030 INSN_CODE (insn) = -1; /* Force re-recognition. */
41031 df_insn_rescan (insn);
41033 if (dump_file)
41034 fprintf (dump_file, "stvx opportunity found at %d\n",
41035 INSN_UID (insn));
41040 /* Look for patterns created from builtin lvx and stvx calls, and
41041 canonicalize them to be properly recognized as such. */
41042 static void
41043 recombine_lvx_stvx_patterns (function *fun)
41045 int i;
41046 basic_block bb;
41047 rtx_insn *insn;
41049 int num_insns = get_max_uid ();
41050 del_info *to_delete = XCNEWVEC (del_info, num_insns);
41052 FOR_ALL_BB_FN (bb, fun)
41053 FOR_BB_INSNS (bb, insn)
41055 if (!NONDEBUG_INSN_P (insn))
41056 continue;
41058 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
41059 recombine_lvx_pattern (insn, to_delete);
41060 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
41061 recombine_stvx_pattern (insn, to_delete);
41064 /* Turning swaps into copies is delayed until now, to avoid problems
41065 with deleting instructions during the insn walk. */
41066 for (i = 0; i < num_insns; i++)
41067 if (to_delete[i].replace)
41069 rtx swap_body = PATTERN (to_delete[i].replace_insn);
41070 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
41071 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
41072 rtx_insn *new_insn = emit_insn_before (copy,
41073 to_delete[i].replace_insn);
41074 set_block_for_insn (new_insn,
41075 BLOCK_FOR_INSN (to_delete[i].replace_insn));
41076 df_insn_rescan (new_insn);
41077 df_insn_delete (to_delete[i].replace_insn);
41078 remove_insn (to_delete[i].replace_insn);
41079 to_delete[i].replace_insn->set_deleted ();
41082 free (to_delete);
41085 /* Main entry point for this pass. */
41086 unsigned int
41087 rs6000_analyze_swaps (function *fun)
41089 swap_web_entry *insn_entry;
41090 basic_block bb;
41091 rtx_insn *insn, *curr_insn = 0;
41093 /* Dataflow analysis for use-def chains. */
41094 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
41095 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
41096 df_analyze ();
41097 df_set_flags (DF_DEFER_INSN_RESCAN);
41099 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
41100 recombine_lvx_stvx_patterns (fun);
41102 /* Allocate structure to represent webs of insns. */
41103 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
41105 /* Walk the insns to gather basic data. */
41106 FOR_ALL_BB_FN (bb, fun)
41107 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
41109 unsigned int uid = INSN_UID (insn);
41110 if (NONDEBUG_INSN_P (insn))
41112 insn_entry[uid].insn = insn;
41114 if (GET_CODE (insn) == CALL_INSN)
41115 insn_entry[uid].is_call = 1;
41117 /* Walk the uses and defs to see if we mention vector regs.
41118 Record any constraints on optimization of such mentions. */
41119 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41120 df_ref mention;
41121 FOR_EACH_INSN_INFO_USE (mention, insn_info)
41123 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41124 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41126 /* If a use gets its value from a call insn, it will be
41127 a hard register and will look like (reg:V4SI 3 3).
41128 The df analysis creates two mentions for GPR3 and GPR4,
41129 both DImode. We must recognize this and treat it as a
41130 vector mention to ensure the call is unioned with this
41131 use. */
41132 if (mode == DImode && DF_REF_INSN_INFO (mention))
41134 rtx feeder = DF_REF_INSN (mention);
41135 /* FIXME: It is pretty hard to get from the df mention
41136 to the mode of the use in the insn. We arbitrarily
41137 pick a vector mode here, even though the use might
41138 be a real DImode. We can be too conservative
41139 (create a web larger than necessary) because of
41140 this, so consider eventually fixing this. */
41141 if (GET_CODE (feeder) == CALL_INSN)
41142 mode = V4SImode;
41145 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41147 insn_entry[uid].is_relevant = 1;
41148 if (mode == TImode || mode == V1TImode
41149 || FLOAT128_VECTOR_P (mode))
41150 insn_entry[uid].is_128_int = 1;
41151 if (DF_REF_INSN_INFO (mention))
41152 insn_entry[uid].contains_subreg
41153 = !rtx_equal_p (DF_REF_REG (mention),
41154 DF_REF_REAL_REG (mention));
41155 union_defs (insn_entry, insn, mention);
41158 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
41160 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41161 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41163 /* If we're loading up a hard vector register for a call,
41164 it looks like (set (reg:V4SI 9 9) (...)). The df
41165 analysis creates two mentions for GPR9 and GPR10, both
41166 DImode. So relying on the mode from the mentions
41167 isn't sufficient to ensure we union the call into the
41168 web with the parameter setup code. */
41169 if (mode == DImode && GET_CODE (insn) == SET
41170 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
41171 mode = GET_MODE (SET_DEST (insn));
41173 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41175 insn_entry[uid].is_relevant = 1;
41176 if (mode == TImode || mode == V1TImode
41177 || FLOAT128_VECTOR_P (mode))
41178 insn_entry[uid].is_128_int = 1;
41179 if (DF_REF_INSN_INFO (mention))
41180 insn_entry[uid].contains_subreg
41181 = !rtx_equal_p (DF_REF_REG (mention),
41182 DF_REF_REAL_REG (mention));
41183 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
41184 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
41185 insn_entry[uid].is_live_out = 1;
41186 union_uses (insn_entry, insn, mention);
41190 if (insn_entry[uid].is_relevant)
41192 /* Determine if this is a load or store. */
41193 insn_entry[uid].is_load = insn_is_load_p (insn);
41194 insn_entry[uid].is_store = insn_is_store_p (insn);
41196 /* Determine if this is a doubleword swap. If not,
41197 determine whether it can legally be swapped. */
41198 if (insn_is_swap_p (insn))
41199 insn_entry[uid].is_swap = 1;
41200 else
41202 unsigned int special = SH_NONE;
41203 insn_entry[uid].is_swappable
41204 = insn_is_swappable_p (insn_entry, insn, &special);
41205 if (special != SH_NONE && insn_entry[uid].contains_subreg)
41206 insn_entry[uid].is_swappable = 0;
41207 else if (special != SH_NONE)
41208 insn_entry[uid].special_handling = special;
41209 else if (insn_entry[uid].contains_subreg)
41210 insn_entry[uid].special_handling = SH_SUBREG;
41216 if (dump_file)
41218 fprintf (dump_file, "\nSwap insn entry table when first built\n");
41219 dump_swap_insn_table (insn_entry);
41222 /* Record unoptimizable webs. */
41223 unsigned e = get_max_uid (), i;
41224 for (i = 0; i < e; ++i)
41226 if (!insn_entry[i].is_relevant)
41227 continue;
41229 swap_web_entry *root
41230 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
41232 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
41233 || (insn_entry[i].contains_subreg
41234 && insn_entry[i].special_handling != SH_SUBREG)
41235 || insn_entry[i].is_128_int || insn_entry[i].is_call
41236 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
41237 root->web_not_optimizable = 1;
41239 /* If we have loads or stores that aren't permuting then the
41240 optimization isn't appropriate. */
41241 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
41242 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
41243 root->web_not_optimizable = 1;
41245 /* If we have permuting loads or stores that are not accompanied
41246 by a register swap, the optimization isn't appropriate. */
41247 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
41249 rtx insn = insn_entry[i].insn;
41250 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41251 df_ref def;
41253 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41255 struct df_link *link = DF_REF_CHAIN (def);
41257 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
41259 root->web_not_optimizable = 1;
41260 break;
41264 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
41266 rtx insn = insn_entry[i].insn;
41267 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41268 df_ref use;
41270 FOR_EACH_INSN_INFO_USE (use, insn_info)
41272 struct df_link *link = DF_REF_CHAIN (use);
41274 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
41276 root->web_not_optimizable = 1;
41277 break;
41283 if (dump_file)
41285 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
41286 dump_swap_insn_table (insn_entry);
41289 /* For each load and store in an optimizable web (which implies
41290 the loads and stores are permuting), find the associated
41291 register swaps and mark them for removal. Due to various
41292 optimizations we may mark the same swap more than once. Also
41293 perform special handling for swappable insns that require it. */
41294 for (i = 0; i < e; ++i)
41295 if ((insn_entry[i].is_load || insn_entry[i].is_store)
41296 && insn_entry[i].is_swap)
41298 swap_web_entry* root_entry
41299 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41300 if (!root_entry->web_not_optimizable)
41301 mark_swaps_for_removal (insn_entry, i);
41303 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
41305 swap_web_entry* root_entry
41306 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41307 if (!root_entry->web_not_optimizable)
41308 handle_special_swappables (insn_entry, i);
41311 /* Now delete the swaps marked for removal. */
41312 for (i = 0; i < e; ++i)
41313 if (insn_entry[i].will_delete)
41314 replace_swap_with_copy (insn_entry, i);
41316 /* Clean up. */
41317 free (insn_entry);
41318 return 0;
41321 const pass_data pass_data_analyze_swaps =
41323 RTL_PASS, /* type */
41324 "swaps", /* name */
41325 OPTGROUP_NONE, /* optinfo_flags */
41326 TV_NONE, /* tv_id */
41327 0, /* properties_required */
41328 0, /* properties_provided */
41329 0, /* properties_destroyed */
41330 0, /* todo_flags_start */
41331 TODO_df_finish, /* todo_flags_finish */
41334 class pass_analyze_swaps : public rtl_opt_pass
41336 public:
41337 pass_analyze_swaps(gcc::context *ctxt)
41338 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
41341 /* opt_pass methods: */
41342 virtual bool gate (function *)
41344 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
41345 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
41348 virtual unsigned int execute (function *fun)
41350 return rs6000_analyze_swaps (fun);
41353 }; // class pass_analyze_swaps
41355 rtl_opt_pass *
41356 make_pass_analyze_swaps (gcc::context *ctxt)
41358 return new pass_analyze_swaps (ctxt);
41361 #ifdef RS6000_GLIBC_ATOMIC_FENV
41362 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
41363 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
41364 #endif
41366 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
41368 static void
41369 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
41371 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
41373 #ifdef RS6000_GLIBC_ATOMIC_FENV
41374 if (atomic_hold_decl == NULL_TREE)
41376 atomic_hold_decl
41377 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41378 get_identifier ("__atomic_feholdexcept"),
41379 build_function_type_list (void_type_node,
41380 double_ptr_type_node,
41381 NULL_TREE));
41382 TREE_PUBLIC (atomic_hold_decl) = 1;
41383 DECL_EXTERNAL (atomic_hold_decl) = 1;
41386 if (atomic_clear_decl == NULL_TREE)
41388 atomic_clear_decl
41389 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41390 get_identifier ("__atomic_feclearexcept"),
41391 build_function_type_list (void_type_node,
41392 NULL_TREE));
41393 TREE_PUBLIC (atomic_clear_decl) = 1;
41394 DECL_EXTERNAL (atomic_clear_decl) = 1;
41397 tree const_double = build_qualified_type (double_type_node,
41398 TYPE_QUAL_CONST);
41399 tree const_double_ptr = build_pointer_type (const_double);
41400 if (atomic_update_decl == NULL_TREE)
41402 atomic_update_decl
41403 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41404 get_identifier ("__atomic_feupdateenv"),
41405 build_function_type_list (void_type_node,
41406 const_double_ptr,
41407 NULL_TREE));
41408 TREE_PUBLIC (atomic_update_decl) = 1;
41409 DECL_EXTERNAL (atomic_update_decl) = 1;
41412 tree fenv_var = create_tmp_var_raw (double_type_node);
41413 TREE_ADDRESSABLE (fenv_var) = 1;
41414 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
41416 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
41417 *clear = build_call_expr (atomic_clear_decl, 0);
41418 *update = build_call_expr (atomic_update_decl, 1,
41419 fold_convert (const_double_ptr, fenv_addr));
41420 #endif
41421 return;
41424 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
41425 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
41426 tree call_mffs = build_call_expr (mffs, 0);
41428 /* Generates the equivalent of feholdexcept (&fenv_var)
41430 *fenv_var = __builtin_mffs ();
41431 double fenv_hold;
41432 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
41433 __builtin_mtfsf (0xff, fenv_hold); */
41435 /* Mask to clear everything except for the rounding modes and non-IEEE
41436 arithmetic flag. */
41437 const unsigned HOST_WIDE_INT hold_exception_mask =
41438 HOST_WIDE_INT_C (0xffffffff00000007);
41440 tree fenv_var = create_tmp_var_raw (double_type_node);
41442 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
41444 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
41445 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41446 build_int_cst (uint64_type_node,
41447 hold_exception_mask));
41449 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41450 fenv_llu_and);
41452 tree hold_mtfsf = build_call_expr (mtfsf, 2,
41453 build_int_cst (unsigned_type_node, 0xff),
41454 fenv_hold_mtfsf);
41456 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
41458 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
41460 double fenv_clear = __builtin_mffs ();
41461 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
41462 __builtin_mtfsf (0xff, fenv_clear); */
41464 /* Mask to clear everything except for the rounding modes and non-IEEE
41465 arithmetic flag. */
41466 const unsigned HOST_WIDE_INT clear_exception_mask =
41467 HOST_WIDE_INT_C (0xffffffff00000000);
41469 tree fenv_clear = create_tmp_var_raw (double_type_node);
41471 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
41473 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
41474 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
41475 fenv_clean_llu,
41476 build_int_cst (uint64_type_node,
41477 clear_exception_mask));
41479 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41480 fenv_clear_llu_and);
41482 tree clear_mtfsf = build_call_expr (mtfsf, 2,
41483 build_int_cst (unsigned_type_node, 0xff),
41484 fenv_clear_mtfsf);
41486 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
41488 /* Generates the equivalent of feupdateenv (&fenv_var)
41490 double old_fenv = __builtin_mffs ();
41491 double fenv_update;
41492 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
41493 (*(uint64_t*)fenv_var 0x1ff80fff);
41494 __builtin_mtfsf (0xff, fenv_update); */
41496 const unsigned HOST_WIDE_INT update_exception_mask =
41497 HOST_WIDE_INT_C (0xffffffff1fffff00);
41498 const unsigned HOST_WIDE_INT new_exception_mask =
41499 HOST_WIDE_INT_C (0x1ff80fff);
41501 tree old_fenv = create_tmp_var_raw (double_type_node);
41502 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
41504 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
41505 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
41506 build_int_cst (uint64_type_node,
41507 update_exception_mask));
41509 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41510 build_int_cst (uint64_type_node,
41511 new_exception_mask));
41513 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
41514 old_llu_and, new_llu_and);
41516 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41517 new_llu_mask);
41519 tree update_mtfsf = build_call_expr (mtfsf, 2,
41520 build_int_cst (unsigned_type_node, 0xff),
41521 fenv_update_mtfsf);
41523 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
41526 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
41528 static bool
41529 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
41530 optimization_type opt_type)
41532 switch (op)
41534 case rsqrt_optab:
41535 return (opt_type == OPTIMIZE_FOR_SPEED
41536 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
41538 default:
41539 return true;
41543 struct gcc_target targetm = TARGET_INITIALIZER;
41545 #include "gt-rs6000.h"