Powerpc bootstrap failure due to duplicate case value
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobf1d5d9d1f9d7c85262b12ebfad6c68f10884415d
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "print-tree.h"
48 #include "varasm.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "output.h"
52 #include "dbxout.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "sched-int.h"
57 #include "gimplify.h"
58 #include "gimple-iterator.h"
59 #include "gimple-ssa.h"
60 #include "gimple-walk.h"
61 #include "intl.h"
62 #include "params.h"
63 #include "tm-constrs.h"
64 #include "tree-vectorizer.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "context.h"
68 #include "tree-pass.h"
69 #if TARGET_XCOFF
70 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
71 #endif
72 #if TARGET_MACHO
73 #include "gstab.h" /* for N_SLINE */
74 #endif
75 #include "case-cfn-macros.h"
76 #include "ppc-auxv.h"
78 /* This file should be included last. */
79 #include "target-def.h"
81 #ifndef TARGET_NO_PROTOTYPE
82 #define TARGET_NO_PROTOTYPE 0
83 #endif
85 #define min(A,B) ((A) < (B) ? (A) : (B))
86 #define max(A,B) ((A) > (B) ? (A) : (B))
88 /* Structure used to define the rs6000 stack */
89 typedef struct rs6000_stack {
90 int reload_completed; /* stack info won't change from here on */
91 int first_gp_reg_save; /* first callee saved GP register used */
92 int first_fp_reg_save; /* first callee saved FP register used */
93 int first_altivec_reg_save; /* first callee saved AltiVec register used */
94 int lr_save_p; /* true if the link reg needs to be saved */
95 int cr_save_p; /* true if the CR reg needs to be saved */
96 unsigned int vrsave_mask; /* mask of vec registers to save */
97 int push_p; /* true if we need to allocate stack space */
98 int calls_p; /* true if the function makes any calls */
99 int world_save_p; /* true if we're saving *everything*:
100 r13-r31, cr, f14-f31, vrsave, v20-v31 */
101 enum rs6000_abi abi; /* which ABI to use */
102 int gp_save_offset; /* offset to save GP regs from initial SP */
103 int fp_save_offset; /* offset to save FP regs from initial SP */
104 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
105 int lr_save_offset; /* offset to save LR from initial SP */
106 int cr_save_offset; /* offset to save CR from initial SP */
107 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
108 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
109 int varargs_save_offset; /* offset to save the varargs registers */
110 int ehrd_offset; /* offset to EH return data */
111 int ehcr_offset; /* offset to EH CR field data */
112 int reg_size; /* register size (4 or 8) */
113 HOST_WIDE_INT vars_size; /* variable save area size */
114 int parm_size; /* outgoing parameter size */
115 int save_size; /* save area size */
116 int fixed_size; /* fixed size of stack frame */
117 int gp_size; /* size of saved GP registers */
118 int fp_size; /* size of saved FP registers */
119 int altivec_size; /* size of saved AltiVec registers */
120 int cr_size; /* size to hold CR if not in fixed area */
121 int vrsave_size; /* size to hold VRSAVE */
122 int altivec_padding_size; /* size of altivec alignment padding */
123 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
124 int spe_padding_size;
125 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
126 int spe_64bit_regs_used;
127 int savres_strategy;
128 } rs6000_stack_t;
130 /* A C structure for machine-specific, per-function data.
131 This is added to the cfun structure. */
132 typedef struct GTY(()) machine_function
134 /* Whether the instruction chain has been scanned already. */
135 int spe_insn_chain_scanned_p;
136 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
137 int ra_needs_full_frame;
138 /* Flags if __builtin_return_address (0) was used. */
139 int ra_need_lr;
140 /* Cache lr_save_p after expansion of builtin_eh_return. */
141 int lr_save_state;
142 /* Whether we need to save the TOC to the reserved stack location in the
143 function prologue. */
144 bool save_toc_in_prologue;
145 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
146 varargs save area. */
147 HOST_WIDE_INT varargs_save_offset;
148 /* Temporary stack slot to use for SDmode copies. This slot is
149 64-bits wide and is allocated early enough so that the offset
150 does not overflow the 16-bit load/store offset field. */
151 rtx sdmode_stack_slot;
152 /* Alternative internal arg pointer for -fsplit-stack. */
153 rtx split_stack_arg_pointer;
154 bool split_stack_argp_used;
155 /* Flag if r2 setup is needed with ELFv2 ABI. */
156 bool r2_setup_needed;
157 /* The components already handled by separate shrink-wrapping, which should
158 not be considered by the prologue and epilogue. */
159 bool gpr_is_wrapped_separately[32];
160 bool lr_is_wrapped_separately;
161 } machine_function;
163 /* Support targetm.vectorize.builtin_mask_for_load. */
164 static GTY(()) tree altivec_builtin_mask_for_load;
166 /* Set to nonzero once AIX common-mode calls have been defined. */
167 static GTY(()) int common_mode_defined;
169 /* Label number of label created for -mrelocatable, to call to so we can
170 get the address of the GOT section */
171 static int rs6000_pic_labelno;
173 #ifdef USING_ELFOS_H
174 /* Counter for labels which are to be placed in .fixup. */
175 int fixuplabelno = 0;
176 #endif
178 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
179 int dot_symbols;
181 /* Specify the machine mode that pointers have. After generation of rtl, the
182 compiler makes no further distinction between pointers and any other objects
183 of this machine mode. The type is unsigned since not all things that
184 include rs6000.h also include machmode.h. */
185 unsigned rs6000_pmode;
187 /* Width in bits of a pointer. */
188 unsigned rs6000_pointer_size;
190 #ifdef HAVE_AS_GNU_ATTRIBUTE
191 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
192 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
193 # endif
194 /* Flag whether floating point values have been passed/returned.
195 Note that this doesn't say whether fprs are used, since the
196 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
197 should be set for soft-float values passed in gprs and ieee128
198 values passed in vsx registers. */
199 static bool rs6000_passes_float;
200 static bool rs6000_passes_long_double;
201 /* Flag whether vector values have been passed/returned. */
202 static bool rs6000_passes_vector;
203 /* Flag whether small (<= 8 byte) structures have been returned. */
204 static bool rs6000_returns_struct;
205 #endif
207 /* Value is TRUE if register/mode pair is acceptable. */
208 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
210 /* Maximum number of registers needed for a given register class and mode. */
211 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
213 /* How many registers are needed for a given register and mode. */
214 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
216 /* Map register number to register class. */
217 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
219 static int dbg_cost_ctrl;
221 /* Built in types. */
222 tree rs6000_builtin_types[RS6000_BTI_MAX];
223 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
225 /* Flag to say the TOC is initialized */
226 int toc_initialized, need_toc_init;
227 char toc_label_name[10];
229 /* Cached value of rs6000_variable_issue. This is cached in
230 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
231 static short cached_can_issue_more;
233 static GTY(()) section *read_only_data_section;
234 static GTY(()) section *private_data_section;
235 static GTY(()) section *tls_data_section;
236 static GTY(()) section *tls_private_data_section;
237 static GTY(()) section *read_only_private_data_section;
238 static GTY(()) section *sdata2_section;
239 static GTY(()) section *toc_section;
241 struct builtin_description
243 const HOST_WIDE_INT mask;
244 const enum insn_code icode;
245 const char *const name;
246 const enum rs6000_builtins code;
249 /* Describe the vector unit used for modes. */
250 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
251 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
253 /* Register classes for various constraints that are based on the target
254 switches. */
255 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
257 /* Describe the alignment of a vector. */
258 int rs6000_vector_align[NUM_MACHINE_MODES];
260 /* Map selected modes to types for builtins. */
261 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
263 /* What modes to automatically generate reciprocal divide estimate (fre) and
264 reciprocal sqrt (frsqrte) for. */
265 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
267 /* Masks to determine which reciprocal esitmate instructions to generate
268 automatically. */
269 enum rs6000_recip_mask {
270 RECIP_SF_DIV = 0x001, /* Use divide estimate */
271 RECIP_DF_DIV = 0x002,
272 RECIP_V4SF_DIV = 0x004,
273 RECIP_V2DF_DIV = 0x008,
275 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
276 RECIP_DF_RSQRT = 0x020,
277 RECIP_V4SF_RSQRT = 0x040,
278 RECIP_V2DF_RSQRT = 0x080,
280 /* Various combination of flags for -mrecip=xxx. */
281 RECIP_NONE = 0,
282 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
283 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
284 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
286 RECIP_HIGH_PRECISION = RECIP_ALL,
288 /* On low precision machines like the power5, don't enable double precision
289 reciprocal square root estimate, since it isn't accurate enough. */
290 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
293 /* -mrecip options. */
294 static struct
296 const char *string; /* option name */
297 unsigned int mask; /* mask bits to set */
298 } recip_options[] = {
299 { "all", RECIP_ALL },
300 { "none", RECIP_NONE },
301 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
302 | RECIP_V2DF_DIV) },
303 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
304 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
305 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
306 | RECIP_V2DF_RSQRT) },
307 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
308 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
311 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
312 static const struct
314 const char *cpu;
315 unsigned int cpuid;
316 } cpu_is_info[] = {
317 { "power9", PPC_PLATFORM_POWER9 },
318 { "power8", PPC_PLATFORM_POWER8 },
319 { "power7", PPC_PLATFORM_POWER7 },
320 { "power6x", PPC_PLATFORM_POWER6X },
321 { "power6", PPC_PLATFORM_POWER6 },
322 { "power5+", PPC_PLATFORM_POWER5_PLUS },
323 { "power5", PPC_PLATFORM_POWER5 },
324 { "ppc970", PPC_PLATFORM_PPC970 },
325 { "power4", PPC_PLATFORM_POWER4 },
326 { "ppca2", PPC_PLATFORM_PPCA2 },
327 { "ppc476", PPC_PLATFORM_PPC476 },
328 { "ppc464", PPC_PLATFORM_PPC464 },
329 { "ppc440", PPC_PLATFORM_PPC440 },
330 { "ppc405", PPC_PLATFORM_PPC405 },
331 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
334 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
335 static const struct
337 const char *hwcap;
338 int mask;
339 unsigned int id;
340 } cpu_supports_info[] = {
341 /* AT_HWCAP masks. */
342 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
343 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
344 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
345 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
346 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
347 { "booke", PPC_FEATURE_BOOKE, 0 },
348 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
349 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
350 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
351 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
352 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
353 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
354 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
355 { "notb", PPC_FEATURE_NO_TB, 0 },
356 { "pa6t", PPC_FEATURE_PA6T, 0 },
357 { "power4", PPC_FEATURE_POWER4, 0 },
358 { "power5", PPC_FEATURE_POWER5, 0 },
359 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
360 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
361 { "ppc32", PPC_FEATURE_32, 0 },
362 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
363 { "ppc64", PPC_FEATURE_64, 0 },
364 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
365 { "smt", PPC_FEATURE_SMT, 0 },
366 { "spe", PPC_FEATURE_HAS_SPE, 0 },
367 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
368 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
369 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
371 /* AT_HWCAP2 masks. */
372 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
373 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
374 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
375 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
376 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
377 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
378 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
379 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
380 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
381 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
384 /* Newer LIBCs explicitly export this symbol to declare that they provide
385 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
386 reference to this symbol whenever we expand a CPU builtin, so that
387 we never link against an old LIBC. */
388 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
390 /* True if we have expanded a CPU builtin. */
391 bool cpu_builtin_p;
393 /* Pointer to function (in rs6000-c.c) that can define or undefine target
394 macros that have changed. Languages that don't support the preprocessor
395 don't link in rs6000-c.c, so we can't call it directly. */
396 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
398 /* Simplfy register classes into simpler classifications. We assume
399 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
400 check for standard register classes (gpr/floating/altivec/vsx) and
401 floating/vector classes (float/altivec/vsx). */
403 enum rs6000_reg_type {
404 NO_REG_TYPE,
405 PSEUDO_REG_TYPE,
406 GPR_REG_TYPE,
407 VSX_REG_TYPE,
408 ALTIVEC_REG_TYPE,
409 FPR_REG_TYPE,
410 SPR_REG_TYPE,
411 CR_REG_TYPE,
412 SPE_ACC_TYPE,
413 SPEFSCR_REG_TYPE
416 /* Map register class to register type. */
417 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
419 /* First/last register type for the 'normal' register types (i.e. general
420 purpose, floating point, altivec, and VSX registers). */
421 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
423 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
426 /* Register classes we care about in secondary reload or go if legitimate
427 address. We only need to worry about GPR, FPR, and Altivec registers here,
428 along an ANY field that is the OR of the 3 register classes. */
430 enum rs6000_reload_reg_type {
431 RELOAD_REG_GPR, /* General purpose registers. */
432 RELOAD_REG_FPR, /* Traditional floating point regs. */
433 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
434 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
435 N_RELOAD_REG
438 /* For setting up register classes, loop through the 3 register classes mapping
439 into real registers, and skip the ANY class, which is just an OR of the
440 bits. */
441 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
442 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
444 /* Map reload register type to a register in the register class. */
445 struct reload_reg_map_type {
446 const char *name; /* Register class name. */
447 int reg; /* Register in the register class. */
450 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
451 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
452 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
453 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
454 { "Any", -1 }, /* RELOAD_REG_ANY. */
457 /* Mask bits for each register class, indexed per mode. Historically the
458 compiler has been more restrictive which types can do PRE_MODIFY instead of
459 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
460 typedef unsigned char addr_mask_type;
462 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
463 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
464 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
465 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
466 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
467 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
468 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
469 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
471 /* Register type masks based on the type, of valid addressing modes. */
472 struct rs6000_reg_addr {
473 enum insn_code reload_load; /* INSN to reload for loading. */
474 enum insn_code reload_store; /* INSN to reload for storing. */
475 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
476 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
477 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
478 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
479 /* INSNs for fusing addi with loads
480 or stores for each reg. class. */
481 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
482 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
483 /* INSNs for fusing addis with loads
484 or stores for each reg. class. */
485 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
486 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
487 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
488 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
489 bool fused_toc; /* Mode supports TOC fusion. */
492 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
494 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
495 static inline bool
496 mode_supports_pre_incdec_p (machine_mode mode)
498 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
499 != 0);
502 /* Helper function to say whether a mode supports PRE_MODIFY. */
503 static inline bool
504 mode_supports_pre_modify_p (machine_mode mode)
506 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
507 != 0);
510 /* Return true if we have D-form addressing in altivec registers. */
511 static inline bool
512 mode_supports_vmx_dform (machine_mode mode)
514 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
517 /* Return true if we have D-form addressing in VSX registers. This addressing
518 is more limited than normal d-form addressing in that the offset must be
519 aligned on a 16-byte boundary. */
520 static inline bool
521 mode_supports_vsx_dform_quad (machine_mode mode)
523 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
524 != 0);
528 /* Target cpu costs. */
530 struct processor_costs {
531 const int mulsi; /* cost of SImode multiplication. */
532 const int mulsi_const; /* cost of SImode multiplication by constant. */
533 const int mulsi_const9; /* cost of SImode mult by short constant. */
534 const int muldi; /* cost of DImode multiplication. */
535 const int divsi; /* cost of SImode division. */
536 const int divdi; /* cost of DImode division. */
537 const int fp; /* cost of simple SFmode and DFmode insns. */
538 const int dmul; /* cost of DFmode multiplication (and fmadd). */
539 const int sdiv; /* cost of SFmode division (fdivs). */
540 const int ddiv; /* cost of DFmode division (fdiv). */
541 const int cache_line_size; /* cache line size in bytes. */
542 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
543 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
544 const int simultaneous_prefetches; /* number of parallel prefetch
545 operations. */
546 const int sfdf_convert; /* cost of SF->DF conversion. */
549 const struct processor_costs *rs6000_cost;
551 /* Processor costs (relative to an add) */
553 /* Instruction size costs on 32bit processors. */
554 static const
555 struct processor_costs size32_cost = {
556 COSTS_N_INSNS (1), /* mulsi */
557 COSTS_N_INSNS (1), /* mulsi_const */
558 COSTS_N_INSNS (1), /* mulsi_const9 */
559 COSTS_N_INSNS (1), /* muldi */
560 COSTS_N_INSNS (1), /* divsi */
561 COSTS_N_INSNS (1), /* divdi */
562 COSTS_N_INSNS (1), /* fp */
563 COSTS_N_INSNS (1), /* dmul */
564 COSTS_N_INSNS (1), /* sdiv */
565 COSTS_N_INSNS (1), /* ddiv */
566 32, /* cache line size */
567 0, /* l1 cache */
568 0, /* l2 cache */
569 0, /* streams */
570 0, /* SF->DF convert */
573 /* Instruction size costs on 64bit processors. */
574 static const
575 struct processor_costs size64_cost = {
576 COSTS_N_INSNS (1), /* mulsi */
577 COSTS_N_INSNS (1), /* mulsi_const */
578 COSTS_N_INSNS (1), /* mulsi_const9 */
579 COSTS_N_INSNS (1), /* muldi */
580 COSTS_N_INSNS (1), /* divsi */
581 COSTS_N_INSNS (1), /* divdi */
582 COSTS_N_INSNS (1), /* fp */
583 COSTS_N_INSNS (1), /* dmul */
584 COSTS_N_INSNS (1), /* sdiv */
585 COSTS_N_INSNS (1), /* ddiv */
586 128, /* cache line size */
587 0, /* l1 cache */
588 0, /* l2 cache */
589 0, /* streams */
590 0, /* SF->DF convert */
593 /* Instruction costs on RS64A processors. */
594 static const
595 struct processor_costs rs64a_cost = {
596 COSTS_N_INSNS (20), /* mulsi */
597 COSTS_N_INSNS (12), /* mulsi_const */
598 COSTS_N_INSNS (8), /* mulsi_const9 */
599 COSTS_N_INSNS (34), /* muldi */
600 COSTS_N_INSNS (65), /* divsi */
601 COSTS_N_INSNS (67), /* divdi */
602 COSTS_N_INSNS (4), /* fp */
603 COSTS_N_INSNS (4), /* dmul */
604 COSTS_N_INSNS (31), /* sdiv */
605 COSTS_N_INSNS (31), /* ddiv */
606 128, /* cache line size */
607 128, /* l1 cache */
608 2048, /* l2 cache */
609 1, /* streams */
610 0, /* SF->DF convert */
613 /* Instruction costs on MPCCORE processors. */
614 static const
615 struct processor_costs mpccore_cost = {
616 COSTS_N_INSNS (2), /* mulsi */
617 COSTS_N_INSNS (2), /* mulsi_const */
618 COSTS_N_INSNS (2), /* mulsi_const9 */
619 COSTS_N_INSNS (2), /* muldi */
620 COSTS_N_INSNS (6), /* divsi */
621 COSTS_N_INSNS (6), /* divdi */
622 COSTS_N_INSNS (4), /* fp */
623 COSTS_N_INSNS (5), /* dmul */
624 COSTS_N_INSNS (10), /* sdiv */
625 COSTS_N_INSNS (17), /* ddiv */
626 32, /* cache line size */
627 4, /* l1 cache */
628 16, /* l2 cache */
629 1, /* streams */
630 0, /* SF->DF convert */
633 /* Instruction costs on PPC403 processors. */
634 static const
635 struct processor_costs ppc403_cost = {
636 COSTS_N_INSNS (4), /* mulsi */
637 COSTS_N_INSNS (4), /* mulsi_const */
638 COSTS_N_INSNS (4), /* mulsi_const9 */
639 COSTS_N_INSNS (4), /* muldi */
640 COSTS_N_INSNS (33), /* divsi */
641 COSTS_N_INSNS (33), /* divdi */
642 COSTS_N_INSNS (11), /* fp */
643 COSTS_N_INSNS (11), /* dmul */
644 COSTS_N_INSNS (11), /* sdiv */
645 COSTS_N_INSNS (11), /* ddiv */
646 32, /* cache line size */
647 4, /* l1 cache */
648 16, /* l2 cache */
649 1, /* streams */
650 0, /* SF->DF convert */
653 /* Instruction costs on PPC405 processors. */
654 static const
655 struct processor_costs ppc405_cost = {
656 COSTS_N_INSNS (5), /* mulsi */
657 COSTS_N_INSNS (4), /* mulsi_const */
658 COSTS_N_INSNS (3), /* mulsi_const9 */
659 COSTS_N_INSNS (5), /* muldi */
660 COSTS_N_INSNS (35), /* divsi */
661 COSTS_N_INSNS (35), /* divdi */
662 COSTS_N_INSNS (11), /* fp */
663 COSTS_N_INSNS (11), /* dmul */
664 COSTS_N_INSNS (11), /* sdiv */
665 COSTS_N_INSNS (11), /* ddiv */
666 32, /* cache line size */
667 16, /* l1 cache */
668 128, /* l2 cache */
669 1, /* streams */
670 0, /* SF->DF convert */
673 /* Instruction costs on PPC440 processors. */
674 static const
675 struct processor_costs ppc440_cost = {
676 COSTS_N_INSNS (3), /* mulsi */
677 COSTS_N_INSNS (2), /* mulsi_const */
678 COSTS_N_INSNS (2), /* mulsi_const9 */
679 COSTS_N_INSNS (3), /* muldi */
680 COSTS_N_INSNS (34), /* divsi */
681 COSTS_N_INSNS (34), /* divdi */
682 COSTS_N_INSNS (5), /* fp */
683 COSTS_N_INSNS (5), /* dmul */
684 COSTS_N_INSNS (19), /* sdiv */
685 COSTS_N_INSNS (33), /* ddiv */
686 32, /* cache line size */
687 32, /* l1 cache */
688 256, /* l2 cache */
689 1, /* streams */
690 0, /* SF->DF convert */
693 /* Instruction costs on PPC476 processors. */
694 static const
695 struct processor_costs ppc476_cost = {
696 COSTS_N_INSNS (4), /* mulsi */
697 COSTS_N_INSNS (4), /* mulsi_const */
698 COSTS_N_INSNS (4), /* mulsi_const9 */
699 COSTS_N_INSNS (4), /* muldi */
700 COSTS_N_INSNS (11), /* divsi */
701 COSTS_N_INSNS (11), /* divdi */
702 COSTS_N_INSNS (6), /* fp */
703 COSTS_N_INSNS (6), /* dmul */
704 COSTS_N_INSNS (19), /* sdiv */
705 COSTS_N_INSNS (33), /* ddiv */
706 32, /* l1 cache line size */
707 32, /* l1 cache */
708 512, /* l2 cache */
709 1, /* streams */
710 0, /* SF->DF convert */
713 /* Instruction costs on PPC601 processors. */
714 static const
715 struct processor_costs ppc601_cost = {
716 COSTS_N_INSNS (5), /* mulsi */
717 COSTS_N_INSNS (5), /* mulsi_const */
718 COSTS_N_INSNS (5), /* mulsi_const9 */
719 COSTS_N_INSNS (5), /* muldi */
720 COSTS_N_INSNS (36), /* divsi */
721 COSTS_N_INSNS (36), /* divdi */
722 COSTS_N_INSNS (4), /* fp */
723 COSTS_N_INSNS (5), /* dmul */
724 COSTS_N_INSNS (17), /* sdiv */
725 COSTS_N_INSNS (31), /* ddiv */
726 32, /* cache line size */
727 32, /* l1 cache */
728 256, /* l2 cache */
729 1, /* streams */
730 0, /* SF->DF convert */
733 /* Instruction costs on PPC603 processors. */
734 static const
735 struct processor_costs ppc603_cost = {
736 COSTS_N_INSNS (5), /* mulsi */
737 COSTS_N_INSNS (3), /* mulsi_const */
738 COSTS_N_INSNS (2), /* mulsi_const9 */
739 COSTS_N_INSNS (5), /* muldi */
740 COSTS_N_INSNS (37), /* divsi */
741 COSTS_N_INSNS (37), /* divdi */
742 COSTS_N_INSNS (3), /* fp */
743 COSTS_N_INSNS (4), /* dmul */
744 COSTS_N_INSNS (18), /* sdiv */
745 COSTS_N_INSNS (33), /* ddiv */
746 32, /* cache line size */
747 8, /* l1 cache */
748 64, /* l2 cache */
749 1, /* streams */
750 0, /* SF->DF convert */
753 /* Instruction costs on PPC604 processors. */
754 static const
755 struct processor_costs ppc604_cost = {
756 COSTS_N_INSNS (4), /* mulsi */
757 COSTS_N_INSNS (4), /* mulsi_const */
758 COSTS_N_INSNS (4), /* mulsi_const9 */
759 COSTS_N_INSNS (4), /* muldi */
760 COSTS_N_INSNS (20), /* divsi */
761 COSTS_N_INSNS (20), /* divdi */
762 COSTS_N_INSNS (3), /* fp */
763 COSTS_N_INSNS (3), /* dmul */
764 COSTS_N_INSNS (18), /* sdiv */
765 COSTS_N_INSNS (32), /* ddiv */
766 32, /* cache line size */
767 16, /* l1 cache */
768 512, /* l2 cache */
769 1, /* streams */
770 0, /* SF->DF convert */
773 /* Instruction costs on PPC604e processors. */
774 static const
775 struct processor_costs ppc604e_cost = {
776 COSTS_N_INSNS (2), /* mulsi */
777 COSTS_N_INSNS (2), /* mulsi_const */
778 COSTS_N_INSNS (2), /* mulsi_const9 */
779 COSTS_N_INSNS (2), /* muldi */
780 COSTS_N_INSNS (20), /* divsi */
781 COSTS_N_INSNS (20), /* divdi */
782 COSTS_N_INSNS (3), /* fp */
783 COSTS_N_INSNS (3), /* dmul */
784 COSTS_N_INSNS (18), /* sdiv */
785 COSTS_N_INSNS (32), /* ddiv */
786 32, /* cache line size */
787 32, /* l1 cache */
788 1024, /* l2 cache */
789 1, /* streams */
790 0, /* SF->DF convert */
793 /* Instruction costs on PPC620 processors. */
794 static const
795 struct processor_costs ppc620_cost = {
796 COSTS_N_INSNS (5), /* mulsi */
797 COSTS_N_INSNS (4), /* mulsi_const */
798 COSTS_N_INSNS (3), /* mulsi_const9 */
799 COSTS_N_INSNS (7), /* muldi */
800 COSTS_N_INSNS (21), /* divsi */
801 COSTS_N_INSNS (37), /* divdi */
802 COSTS_N_INSNS (3), /* fp */
803 COSTS_N_INSNS (3), /* dmul */
804 COSTS_N_INSNS (18), /* sdiv */
805 COSTS_N_INSNS (32), /* ddiv */
806 128, /* cache line size */
807 32, /* l1 cache */
808 1024, /* l2 cache */
809 1, /* streams */
810 0, /* SF->DF convert */
813 /* Instruction costs on PPC630 processors. */
814 static const
815 struct processor_costs ppc630_cost = {
816 COSTS_N_INSNS (5), /* mulsi */
817 COSTS_N_INSNS (4), /* mulsi_const */
818 COSTS_N_INSNS (3), /* mulsi_const9 */
819 COSTS_N_INSNS (7), /* muldi */
820 COSTS_N_INSNS (21), /* divsi */
821 COSTS_N_INSNS (37), /* divdi */
822 COSTS_N_INSNS (3), /* fp */
823 COSTS_N_INSNS (3), /* dmul */
824 COSTS_N_INSNS (17), /* sdiv */
825 COSTS_N_INSNS (21), /* ddiv */
826 128, /* cache line size */
827 64, /* l1 cache */
828 1024, /* l2 cache */
829 1, /* streams */
830 0, /* SF->DF convert */
833 /* Instruction costs on Cell processor. */
834 /* COSTS_N_INSNS (1) ~ one add. */
835 static const
836 struct processor_costs ppccell_cost = {
837 COSTS_N_INSNS (9/2)+2, /* mulsi */
838 COSTS_N_INSNS (6/2), /* mulsi_const */
839 COSTS_N_INSNS (6/2), /* mulsi_const9 */
840 COSTS_N_INSNS (15/2)+2, /* muldi */
841 COSTS_N_INSNS (38/2), /* divsi */
842 COSTS_N_INSNS (70/2), /* divdi */
843 COSTS_N_INSNS (10/2), /* fp */
844 COSTS_N_INSNS (10/2), /* dmul */
845 COSTS_N_INSNS (74/2), /* sdiv */
846 COSTS_N_INSNS (74/2), /* ddiv */
847 128, /* cache line size */
848 32, /* l1 cache */
849 512, /* l2 cache */
850 6, /* streams */
851 0, /* SF->DF convert */
854 /* Instruction costs on PPC750 and PPC7400 processors. */
855 static const
856 struct processor_costs ppc750_cost = {
857 COSTS_N_INSNS (5), /* mulsi */
858 COSTS_N_INSNS (3), /* mulsi_const */
859 COSTS_N_INSNS (2), /* mulsi_const9 */
860 COSTS_N_INSNS (5), /* muldi */
861 COSTS_N_INSNS (17), /* divsi */
862 COSTS_N_INSNS (17), /* divdi */
863 COSTS_N_INSNS (3), /* fp */
864 COSTS_N_INSNS (3), /* dmul */
865 COSTS_N_INSNS (17), /* sdiv */
866 COSTS_N_INSNS (31), /* ddiv */
867 32, /* cache line size */
868 32, /* l1 cache */
869 512, /* l2 cache */
870 1, /* streams */
871 0, /* SF->DF convert */
874 /* Instruction costs on PPC7450 processors. */
875 static const
876 struct processor_costs ppc7450_cost = {
877 COSTS_N_INSNS (4), /* mulsi */
878 COSTS_N_INSNS (3), /* mulsi_const */
879 COSTS_N_INSNS (3), /* mulsi_const9 */
880 COSTS_N_INSNS (4), /* muldi */
881 COSTS_N_INSNS (23), /* divsi */
882 COSTS_N_INSNS (23), /* divdi */
883 COSTS_N_INSNS (5), /* fp */
884 COSTS_N_INSNS (5), /* dmul */
885 COSTS_N_INSNS (21), /* sdiv */
886 COSTS_N_INSNS (35), /* ddiv */
887 32, /* cache line size */
888 32, /* l1 cache */
889 1024, /* l2 cache */
890 1, /* streams */
891 0, /* SF->DF convert */
894 /* Instruction costs on PPC8540 processors. */
895 static const
896 struct processor_costs ppc8540_cost = {
897 COSTS_N_INSNS (4), /* mulsi */
898 COSTS_N_INSNS (4), /* mulsi_const */
899 COSTS_N_INSNS (4), /* mulsi_const9 */
900 COSTS_N_INSNS (4), /* muldi */
901 COSTS_N_INSNS (19), /* divsi */
902 COSTS_N_INSNS (19), /* divdi */
903 COSTS_N_INSNS (4), /* fp */
904 COSTS_N_INSNS (4), /* dmul */
905 COSTS_N_INSNS (29), /* sdiv */
906 COSTS_N_INSNS (29), /* ddiv */
907 32, /* cache line size */
908 32, /* l1 cache */
909 256, /* l2 cache */
910 1, /* prefetch streams /*/
911 0, /* SF->DF convert */
914 /* Instruction costs on E300C2 and E300C3 cores. */
915 static const
916 struct processor_costs ppce300c2c3_cost = {
917 COSTS_N_INSNS (4), /* mulsi */
918 COSTS_N_INSNS (4), /* mulsi_const */
919 COSTS_N_INSNS (4), /* mulsi_const9 */
920 COSTS_N_INSNS (4), /* muldi */
921 COSTS_N_INSNS (19), /* divsi */
922 COSTS_N_INSNS (19), /* divdi */
923 COSTS_N_INSNS (3), /* fp */
924 COSTS_N_INSNS (4), /* dmul */
925 COSTS_N_INSNS (18), /* sdiv */
926 COSTS_N_INSNS (33), /* ddiv */
928 16, /* l1 cache */
929 16, /* l2 cache */
930 1, /* prefetch streams /*/
931 0, /* SF->DF convert */
934 /* Instruction costs on PPCE500MC processors. */
935 static const
936 struct processor_costs ppce500mc_cost = {
937 COSTS_N_INSNS (4), /* mulsi */
938 COSTS_N_INSNS (4), /* mulsi_const */
939 COSTS_N_INSNS (4), /* mulsi_const9 */
940 COSTS_N_INSNS (4), /* muldi */
941 COSTS_N_INSNS (14), /* divsi */
942 COSTS_N_INSNS (14), /* divdi */
943 COSTS_N_INSNS (8), /* fp */
944 COSTS_N_INSNS (10), /* dmul */
945 COSTS_N_INSNS (36), /* sdiv */
946 COSTS_N_INSNS (66), /* ddiv */
947 64, /* cache line size */
948 32, /* l1 cache */
949 128, /* l2 cache */
950 1, /* prefetch streams /*/
951 0, /* SF->DF convert */
954 /* Instruction costs on PPCE500MC64 processors. */
955 static const
956 struct processor_costs ppce500mc64_cost = {
957 COSTS_N_INSNS (4), /* mulsi */
958 COSTS_N_INSNS (4), /* mulsi_const */
959 COSTS_N_INSNS (4), /* mulsi_const9 */
960 COSTS_N_INSNS (4), /* muldi */
961 COSTS_N_INSNS (14), /* divsi */
962 COSTS_N_INSNS (14), /* divdi */
963 COSTS_N_INSNS (4), /* fp */
964 COSTS_N_INSNS (10), /* dmul */
965 COSTS_N_INSNS (36), /* sdiv */
966 COSTS_N_INSNS (66), /* ddiv */
967 64, /* cache line size */
968 32, /* l1 cache */
969 128, /* l2 cache */
970 1, /* prefetch streams /*/
971 0, /* SF->DF convert */
974 /* Instruction costs on PPCE5500 processors. */
975 static const
976 struct processor_costs ppce5500_cost = {
977 COSTS_N_INSNS (5), /* mulsi */
978 COSTS_N_INSNS (5), /* mulsi_const */
979 COSTS_N_INSNS (4), /* mulsi_const9 */
980 COSTS_N_INSNS (5), /* muldi */
981 COSTS_N_INSNS (14), /* divsi */
982 COSTS_N_INSNS (14), /* divdi */
983 COSTS_N_INSNS (7), /* fp */
984 COSTS_N_INSNS (10), /* dmul */
985 COSTS_N_INSNS (36), /* sdiv */
986 COSTS_N_INSNS (66), /* ddiv */
987 64, /* cache line size */
988 32, /* l1 cache */
989 128, /* l2 cache */
990 1, /* prefetch streams /*/
991 0, /* SF->DF convert */
994 /* Instruction costs on PPCE6500 processors. */
995 static const
996 struct processor_costs ppce6500_cost = {
997 COSTS_N_INSNS (5), /* mulsi */
998 COSTS_N_INSNS (5), /* mulsi_const */
999 COSTS_N_INSNS (4), /* mulsi_const9 */
1000 COSTS_N_INSNS (5), /* muldi */
1001 COSTS_N_INSNS (14), /* divsi */
1002 COSTS_N_INSNS (14), /* divdi */
1003 COSTS_N_INSNS (7), /* fp */
1004 COSTS_N_INSNS (10), /* dmul */
1005 COSTS_N_INSNS (36), /* sdiv */
1006 COSTS_N_INSNS (66), /* ddiv */
1007 64, /* cache line size */
1008 32, /* l1 cache */
1009 128, /* l2 cache */
1010 1, /* prefetch streams /*/
1011 0, /* SF->DF convert */
1014 /* Instruction costs on AppliedMicro Titan processors. */
1015 static const
1016 struct processor_costs titan_cost = {
1017 COSTS_N_INSNS (5), /* mulsi */
1018 COSTS_N_INSNS (5), /* mulsi_const */
1019 COSTS_N_INSNS (5), /* mulsi_const9 */
1020 COSTS_N_INSNS (5), /* muldi */
1021 COSTS_N_INSNS (18), /* divsi */
1022 COSTS_N_INSNS (18), /* divdi */
1023 COSTS_N_INSNS (10), /* fp */
1024 COSTS_N_INSNS (10), /* dmul */
1025 COSTS_N_INSNS (46), /* sdiv */
1026 COSTS_N_INSNS (72), /* ddiv */
1027 32, /* cache line size */
1028 32, /* l1 cache */
1029 512, /* l2 cache */
1030 1, /* prefetch streams /*/
1031 0, /* SF->DF convert */
1034 /* Instruction costs on POWER4 and POWER5 processors. */
1035 static const
1036 struct processor_costs power4_cost = {
1037 COSTS_N_INSNS (3), /* mulsi */
1038 COSTS_N_INSNS (2), /* mulsi_const */
1039 COSTS_N_INSNS (2), /* mulsi_const9 */
1040 COSTS_N_INSNS (4), /* muldi */
1041 COSTS_N_INSNS (18), /* divsi */
1042 COSTS_N_INSNS (34), /* divdi */
1043 COSTS_N_INSNS (3), /* fp */
1044 COSTS_N_INSNS (3), /* dmul */
1045 COSTS_N_INSNS (17), /* sdiv */
1046 COSTS_N_INSNS (17), /* ddiv */
1047 128, /* cache line size */
1048 32, /* l1 cache */
1049 1024, /* l2 cache */
1050 8, /* prefetch streams /*/
1051 0, /* SF->DF convert */
1054 /* Instruction costs on POWER6 processors. */
1055 static const
1056 struct processor_costs power6_cost = {
1057 COSTS_N_INSNS (8), /* mulsi */
1058 COSTS_N_INSNS (8), /* mulsi_const */
1059 COSTS_N_INSNS (8), /* mulsi_const9 */
1060 COSTS_N_INSNS (8), /* muldi */
1061 COSTS_N_INSNS (22), /* divsi */
1062 COSTS_N_INSNS (28), /* divdi */
1063 COSTS_N_INSNS (3), /* fp */
1064 COSTS_N_INSNS (3), /* dmul */
1065 COSTS_N_INSNS (13), /* sdiv */
1066 COSTS_N_INSNS (16), /* ddiv */
1067 128, /* cache line size */
1068 64, /* l1 cache */
1069 2048, /* l2 cache */
1070 16, /* prefetch streams */
1071 0, /* SF->DF convert */
1074 /* Instruction costs on POWER7 processors. */
1075 static const
1076 struct processor_costs power7_cost = {
1077 COSTS_N_INSNS (2), /* mulsi */
1078 COSTS_N_INSNS (2), /* mulsi_const */
1079 COSTS_N_INSNS (2), /* mulsi_const9 */
1080 COSTS_N_INSNS (2), /* muldi */
1081 COSTS_N_INSNS (18), /* divsi */
1082 COSTS_N_INSNS (34), /* divdi */
1083 COSTS_N_INSNS (3), /* fp */
1084 COSTS_N_INSNS (3), /* dmul */
1085 COSTS_N_INSNS (13), /* sdiv */
1086 COSTS_N_INSNS (16), /* ddiv */
1087 128, /* cache line size */
1088 32, /* l1 cache */
1089 256, /* l2 cache */
1090 12, /* prefetch streams */
1091 COSTS_N_INSNS (3), /* SF->DF convert */
1094 /* Instruction costs on POWER8 processors. */
1095 static const
1096 struct processor_costs power8_cost = {
1097 COSTS_N_INSNS (3), /* mulsi */
1098 COSTS_N_INSNS (3), /* mulsi_const */
1099 COSTS_N_INSNS (3), /* mulsi_const9 */
1100 COSTS_N_INSNS (3), /* muldi */
1101 COSTS_N_INSNS (19), /* divsi */
1102 COSTS_N_INSNS (35), /* divdi */
1103 COSTS_N_INSNS (3), /* fp */
1104 COSTS_N_INSNS (3), /* dmul */
1105 COSTS_N_INSNS (14), /* sdiv */
1106 COSTS_N_INSNS (17), /* ddiv */
1107 128, /* cache line size */
1108 32, /* l1 cache */
1109 256, /* l2 cache */
1110 12, /* prefetch streams */
1111 COSTS_N_INSNS (3), /* SF->DF convert */
1114 /* Instruction costs on POWER9 processors. */
1115 static const
1116 struct processor_costs power9_cost = {
1117 COSTS_N_INSNS (3), /* mulsi */
1118 COSTS_N_INSNS (3), /* mulsi_const */
1119 COSTS_N_INSNS (3), /* mulsi_const9 */
1120 COSTS_N_INSNS (3), /* muldi */
1121 COSTS_N_INSNS (8), /* divsi */
1122 COSTS_N_INSNS (12), /* divdi */
1123 COSTS_N_INSNS (3), /* fp */
1124 COSTS_N_INSNS (3), /* dmul */
1125 COSTS_N_INSNS (13), /* sdiv */
1126 COSTS_N_INSNS (18), /* ddiv */
1127 128, /* cache line size */
1128 32, /* l1 cache */
1129 512, /* l2 cache */
1130 8, /* prefetch streams */
1131 COSTS_N_INSNS (3), /* SF->DF convert */
1134 /* Instruction costs on POWER A2 processors. */
1135 static const
1136 struct processor_costs ppca2_cost = {
1137 COSTS_N_INSNS (16), /* mulsi */
1138 COSTS_N_INSNS (16), /* mulsi_const */
1139 COSTS_N_INSNS (16), /* mulsi_const9 */
1140 COSTS_N_INSNS (16), /* muldi */
1141 COSTS_N_INSNS (22), /* divsi */
1142 COSTS_N_INSNS (28), /* divdi */
1143 COSTS_N_INSNS (3), /* fp */
1144 COSTS_N_INSNS (3), /* dmul */
1145 COSTS_N_INSNS (59), /* sdiv */
1146 COSTS_N_INSNS (72), /* ddiv */
1148 16, /* l1 cache */
1149 2048, /* l2 cache */
1150 16, /* prefetch streams */
1151 0, /* SF->DF convert */
1155 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1156 #undef RS6000_BUILTIN_0
1157 #undef RS6000_BUILTIN_1
1158 #undef RS6000_BUILTIN_2
1159 #undef RS6000_BUILTIN_3
1160 #undef RS6000_BUILTIN_A
1161 #undef RS6000_BUILTIN_D
1162 #undef RS6000_BUILTIN_E
1163 #undef RS6000_BUILTIN_H
1164 #undef RS6000_BUILTIN_P
1165 #undef RS6000_BUILTIN_Q
1166 #undef RS6000_BUILTIN_S
1167 #undef RS6000_BUILTIN_X
1169 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1170 { NAME, ICODE, MASK, ATTR },
1172 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1173 { NAME, ICODE, MASK, ATTR },
1175 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1176 { NAME, ICODE, MASK, ATTR },
1178 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1179 { NAME, ICODE, MASK, ATTR },
1181 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1182 { NAME, ICODE, MASK, ATTR },
1184 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1185 { NAME, ICODE, MASK, ATTR },
1187 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1188 { NAME, ICODE, MASK, ATTR },
1190 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1191 { NAME, ICODE, MASK, ATTR },
1193 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1194 { NAME, ICODE, MASK, ATTR },
1196 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1197 { NAME, ICODE, MASK, ATTR },
1199 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1200 { NAME, ICODE, MASK, ATTR },
1202 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1203 { NAME, ICODE, MASK, ATTR },
1205 struct rs6000_builtin_info_type {
1206 const char *name;
1207 const enum insn_code icode;
1208 const HOST_WIDE_INT mask;
1209 const unsigned attr;
1212 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1214 #include "rs6000-builtin.def"
1217 #undef RS6000_BUILTIN_0
1218 #undef RS6000_BUILTIN_1
1219 #undef RS6000_BUILTIN_2
1220 #undef RS6000_BUILTIN_3
1221 #undef RS6000_BUILTIN_A
1222 #undef RS6000_BUILTIN_D
1223 #undef RS6000_BUILTIN_E
1224 #undef RS6000_BUILTIN_H
1225 #undef RS6000_BUILTIN_P
1226 #undef RS6000_BUILTIN_Q
1227 #undef RS6000_BUILTIN_S
1228 #undef RS6000_BUILTIN_X
1230 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1231 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1234 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1235 static bool spe_func_has_64bit_regs_p (void);
1236 static struct machine_function * rs6000_init_machine_status (void);
1237 static int rs6000_ra_ever_killed (void);
1238 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1239 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1240 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1241 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1242 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1243 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1244 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1245 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1246 bool);
1247 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1248 unsigned int);
1249 static bool is_microcoded_insn (rtx_insn *);
1250 static bool is_nonpipeline_insn (rtx_insn *);
1251 static bool is_cracked_insn (rtx_insn *);
1252 static bool is_load_insn (rtx, rtx *);
1253 static bool is_store_insn (rtx, rtx *);
1254 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1255 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1256 static bool insn_must_be_first_in_group (rtx_insn *);
1257 static bool insn_must_be_last_in_group (rtx_insn *);
1258 static void altivec_init_builtins (void);
1259 static tree builtin_function_type (machine_mode, machine_mode,
1260 machine_mode, machine_mode,
1261 enum rs6000_builtins, const char *name);
1262 static void rs6000_common_init_builtins (void);
1263 static void paired_init_builtins (void);
1264 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1265 static void spe_init_builtins (void);
1266 static void htm_init_builtins (void);
1267 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1268 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1269 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1270 static rs6000_stack_t *rs6000_stack_info (void);
1271 static void is_altivec_return_reg (rtx, void *);
1272 int easy_vector_constant (rtx, machine_mode);
1273 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1274 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1275 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1276 bool, bool);
1277 #if TARGET_MACHO
1278 static void macho_branch_islands (void);
1279 #endif
1280 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1281 int, int *);
1282 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1283 int, int, int *);
1284 static bool rs6000_mode_dependent_address (const_rtx);
1285 static bool rs6000_debug_mode_dependent_address (const_rtx);
1286 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1287 machine_mode, rtx);
1288 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1289 machine_mode,
1290 rtx);
1291 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1292 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1293 enum reg_class);
1294 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1295 machine_mode);
1296 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1297 enum reg_class,
1298 machine_mode);
1299 static bool rs6000_cannot_change_mode_class (machine_mode,
1300 machine_mode,
1301 enum reg_class);
1302 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1303 machine_mode,
1304 enum reg_class);
1305 static bool rs6000_save_toc_in_prologue_p (void);
1306 static rtx rs6000_internal_arg_pointer (void);
1308 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1309 int, int *)
1310 = rs6000_legitimize_reload_address;
1312 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1313 = rs6000_mode_dependent_address;
1315 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1316 machine_mode, rtx)
1317 = rs6000_secondary_reload_class;
1319 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1320 = rs6000_preferred_reload_class;
1322 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1323 machine_mode)
1324 = rs6000_secondary_memory_needed;
1326 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1327 machine_mode,
1328 enum reg_class)
1329 = rs6000_cannot_change_mode_class;
1331 const int INSN_NOT_AVAILABLE = -1;
1333 static void rs6000_print_isa_options (FILE *, int, const char *,
1334 HOST_WIDE_INT);
1335 static void rs6000_print_builtin_options (FILE *, int, const char *,
1336 HOST_WIDE_INT);
1338 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1339 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1340 enum rs6000_reg_type,
1341 machine_mode,
1342 secondary_reload_info *,
1343 bool);
1344 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1345 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1346 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1348 /* Hash table stuff for keeping track of TOC entries. */
1350 struct GTY((for_user)) toc_hash_struct
1352 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1353 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1354 rtx key;
1355 machine_mode key_mode;
1356 int labelno;
1359 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1361 static hashval_t hash (toc_hash_struct *);
1362 static bool equal (toc_hash_struct *, toc_hash_struct *);
1365 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1367 /* Hash table to keep track of the argument types for builtin functions. */
1369 struct GTY((for_user)) builtin_hash_struct
1371 tree type;
1372 machine_mode mode[4]; /* return value + 3 arguments. */
1373 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1376 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1378 static hashval_t hash (builtin_hash_struct *);
1379 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1382 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1385 /* Default register names. */
1386 char rs6000_reg_names[][8] =
1388 "0", "1", "2", "3", "4", "5", "6", "7",
1389 "8", "9", "10", "11", "12", "13", "14", "15",
1390 "16", "17", "18", "19", "20", "21", "22", "23",
1391 "24", "25", "26", "27", "28", "29", "30", "31",
1392 "0", "1", "2", "3", "4", "5", "6", "7",
1393 "8", "9", "10", "11", "12", "13", "14", "15",
1394 "16", "17", "18", "19", "20", "21", "22", "23",
1395 "24", "25", "26", "27", "28", "29", "30", "31",
1396 "mq", "lr", "ctr","ap",
1397 "0", "1", "2", "3", "4", "5", "6", "7",
1398 "ca",
1399 /* AltiVec registers. */
1400 "0", "1", "2", "3", "4", "5", "6", "7",
1401 "8", "9", "10", "11", "12", "13", "14", "15",
1402 "16", "17", "18", "19", "20", "21", "22", "23",
1403 "24", "25", "26", "27", "28", "29", "30", "31",
1404 "vrsave", "vscr",
1405 /* SPE registers. */
1406 "spe_acc", "spefscr",
1407 /* Soft frame pointer. */
1408 "sfp",
1409 /* HTM SPR registers. */
1410 "tfhar", "tfiar", "texasr",
1411 /* SPE High registers. */
1412 "0", "1", "2", "3", "4", "5", "6", "7",
1413 "8", "9", "10", "11", "12", "13", "14", "15",
1414 "16", "17", "18", "19", "20", "21", "22", "23",
1415 "24", "25", "26", "27", "28", "29", "30", "31"
1418 #ifdef TARGET_REGNAMES
1419 static const char alt_reg_names[][8] =
1421 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1422 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1423 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1424 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1425 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1426 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1427 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1428 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1429 "mq", "lr", "ctr", "ap",
1430 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1431 "ca",
1432 /* AltiVec registers. */
1433 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1434 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1435 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1436 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1437 "vrsave", "vscr",
1438 /* SPE registers. */
1439 "spe_acc", "spefscr",
1440 /* Soft frame pointer. */
1441 "sfp",
1442 /* HTM SPR registers. */
1443 "tfhar", "tfiar", "texasr",
1444 /* SPE High registers. */
1445 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1446 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1447 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1448 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1450 #endif
1452 /* Table of valid machine attributes. */
1454 static const struct attribute_spec rs6000_attribute_table[] =
1456 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1457 affects_type_identity } */
1458 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1459 false },
1460 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1461 false },
1462 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1463 false },
1464 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1465 false },
1466 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1467 false },
1468 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1469 SUBTARGET_ATTRIBUTE_TABLE,
1470 #endif
1471 { NULL, 0, 0, false, false, false, NULL, false }
1474 #ifndef TARGET_PROFILE_KERNEL
1475 #define TARGET_PROFILE_KERNEL 0
1476 #endif
1478 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1479 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1481 /* Initialize the GCC target structure. */
1482 #undef TARGET_ATTRIBUTE_TABLE
1483 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1484 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1485 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1486 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1487 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1489 #undef TARGET_ASM_ALIGNED_DI_OP
1490 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1492 /* Default unaligned ops are only provided for ELF. Find the ops needed
1493 for non-ELF systems. */
1494 #ifndef OBJECT_FORMAT_ELF
1495 #if TARGET_XCOFF
1496 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1497 64-bit targets. */
1498 #undef TARGET_ASM_UNALIGNED_HI_OP
1499 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1500 #undef TARGET_ASM_UNALIGNED_SI_OP
1501 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1502 #undef TARGET_ASM_UNALIGNED_DI_OP
1503 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1504 #else
1505 /* For Darwin. */
1506 #undef TARGET_ASM_UNALIGNED_HI_OP
1507 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1508 #undef TARGET_ASM_UNALIGNED_SI_OP
1509 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1510 #undef TARGET_ASM_UNALIGNED_DI_OP
1511 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1512 #undef TARGET_ASM_ALIGNED_DI_OP
1513 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1514 #endif
1515 #endif
1517 /* This hook deals with fixups for relocatable code and DI-mode objects
1518 in 64-bit code. */
1519 #undef TARGET_ASM_INTEGER
1520 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1522 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1523 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1524 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1525 #endif
1527 #undef TARGET_SET_UP_BY_PROLOGUE
1528 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1530 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1531 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1532 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1533 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1534 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1535 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1536 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1537 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1538 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1539 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1540 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1541 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1543 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1544 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1546 #undef TARGET_INTERNAL_ARG_POINTER
1547 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1549 #undef TARGET_HAVE_TLS
1550 #define TARGET_HAVE_TLS HAVE_AS_TLS
1552 #undef TARGET_CANNOT_FORCE_CONST_MEM
1553 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1555 #undef TARGET_DELEGITIMIZE_ADDRESS
1556 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1558 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1559 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1561 #undef TARGET_LEGITIMATE_COMBINED_INSN
1562 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1564 #undef TARGET_ASM_FUNCTION_PROLOGUE
1565 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1566 #undef TARGET_ASM_FUNCTION_EPILOGUE
1567 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1569 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1570 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1572 #undef TARGET_LEGITIMIZE_ADDRESS
1573 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1575 #undef TARGET_SCHED_VARIABLE_ISSUE
1576 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1578 #undef TARGET_SCHED_ISSUE_RATE
1579 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1580 #undef TARGET_SCHED_ADJUST_COST
1581 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1582 #undef TARGET_SCHED_ADJUST_PRIORITY
1583 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1584 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1585 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1586 #undef TARGET_SCHED_INIT
1587 #define TARGET_SCHED_INIT rs6000_sched_init
1588 #undef TARGET_SCHED_FINISH
1589 #define TARGET_SCHED_FINISH rs6000_sched_finish
1590 #undef TARGET_SCHED_REORDER
1591 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1592 #undef TARGET_SCHED_REORDER2
1593 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1595 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1596 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1598 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1599 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1601 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1602 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1603 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1604 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1605 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1606 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1607 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1608 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1610 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1611 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1612 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1613 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1614 rs6000_builtin_support_vector_misalignment
1615 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1616 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1617 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1618 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1619 rs6000_builtin_vectorization_cost
1620 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1621 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1622 rs6000_preferred_simd_mode
1623 #undef TARGET_VECTORIZE_INIT_COST
1624 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1625 #undef TARGET_VECTORIZE_ADD_STMT_COST
1626 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1627 #undef TARGET_VECTORIZE_FINISH_COST
1628 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1629 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1630 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1632 #undef TARGET_INIT_BUILTINS
1633 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1634 #undef TARGET_BUILTIN_DECL
1635 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1637 #undef TARGET_FOLD_BUILTIN
1638 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1639 #undef TARGET_GIMPLE_FOLD_BUILTIN
1640 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1642 #undef TARGET_EXPAND_BUILTIN
1643 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1645 #undef TARGET_MANGLE_TYPE
1646 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1648 #undef TARGET_INIT_LIBFUNCS
1649 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1651 #if TARGET_MACHO
1652 #undef TARGET_BINDS_LOCAL_P
1653 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1654 #endif
1656 #undef TARGET_MS_BITFIELD_LAYOUT_P
1657 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1659 #undef TARGET_ASM_OUTPUT_MI_THUNK
1660 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1662 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1663 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1665 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1666 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1668 #undef TARGET_REGISTER_MOVE_COST
1669 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1670 #undef TARGET_MEMORY_MOVE_COST
1671 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1672 #undef TARGET_CANNOT_COPY_INSN_P
1673 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1674 #undef TARGET_RTX_COSTS
1675 #define TARGET_RTX_COSTS rs6000_rtx_costs
1676 #undef TARGET_ADDRESS_COST
1677 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1679 #undef TARGET_DWARF_REGISTER_SPAN
1680 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1682 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1683 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1685 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1686 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1688 #undef TARGET_PROMOTE_FUNCTION_MODE
1689 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1691 #undef TARGET_RETURN_IN_MEMORY
1692 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1694 #undef TARGET_RETURN_IN_MSB
1695 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1697 #undef TARGET_SETUP_INCOMING_VARARGS
1698 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1700 /* Always strict argument naming on rs6000. */
1701 #undef TARGET_STRICT_ARGUMENT_NAMING
1702 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1703 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1704 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1705 #undef TARGET_SPLIT_COMPLEX_ARG
1706 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1707 #undef TARGET_MUST_PASS_IN_STACK
1708 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1709 #undef TARGET_PASS_BY_REFERENCE
1710 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1711 #undef TARGET_ARG_PARTIAL_BYTES
1712 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1713 #undef TARGET_FUNCTION_ARG_ADVANCE
1714 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1715 #undef TARGET_FUNCTION_ARG
1716 #define TARGET_FUNCTION_ARG rs6000_function_arg
1717 #undef TARGET_FUNCTION_ARG_BOUNDARY
1718 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1720 #undef TARGET_BUILD_BUILTIN_VA_LIST
1721 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1723 #undef TARGET_EXPAND_BUILTIN_VA_START
1724 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1726 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1727 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1729 #undef TARGET_EH_RETURN_FILTER_MODE
1730 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1732 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1733 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1735 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1736 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1738 #undef TARGET_FLOATN_MODE
1739 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1741 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1742 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1744 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1745 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1747 #undef TARGET_MD_ASM_ADJUST
1748 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1750 #undef TARGET_OPTION_OVERRIDE
1751 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1753 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1754 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1755 rs6000_builtin_vectorized_function
1757 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1758 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1759 rs6000_builtin_md_vectorized_function
1761 #ifdef TARGET_THREAD_SSP_OFFSET
1762 #undef TARGET_STACK_PROTECT_GUARD
1763 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
1764 #endif
1766 #if !TARGET_MACHO
1767 #undef TARGET_STACK_PROTECT_FAIL
1768 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1769 #endif
1771 #ifdef HAVE_AS_TLS
1772 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1773 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1774 #endif
1776 /* Use a 32-bit anchor range. This leads to sequences like:
1778 addis tmp,anchor,high
1779 add dest,tmp,low
1781 where tmp itself acts as an anchor, and can be shared between
1782 accesses to the same 64k page. */
1783 #undef TARGET_MIN_ANCHOR_OFFSET
1784 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1785 #undef TARGET_MAX_ANCHOR_OFFSET
1786 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1787 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1788 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1789 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1790 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1792 #undef TARGET_BUILTIN_RECIPROCAL
1793 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1795 #undef TARGET_EXPAND_TO_RTL_HOOK
1796 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1798 #undef TARGET_INSTANTIATE_DECLS
1799 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1801 #undef TARGET_SECONDARY_RELOAD
1802 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1804 #undef TARGET_LEGITIMATE_ADDRESS_P
1805 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1807 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1808 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1810 #undef TARGET_LRA_P
1811 #define TARGET_LRA_P rs6000_lra_p
1813 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1814 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1816 #undef TARGET_CAN_ELIMINATE
1817 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1819 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1820 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1822 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1823 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1825 #undef TARGET_TRAMPOLINE_INIT
1826 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1828 #undef TARGET_FUNCTION_VALUE
1829 #define TARGET_FUNCTION_VALUE rs6000_function_value
1831 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1832 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1834 #undef TARGET_OPTION_SAVE
1835 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1837 #undef TARGET_OPTION_RESTORE
1838 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1840 #undef TARGET_OPTION_PRINT
1841 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1843 #undef TARGET_CAN_INLINE_P
1844 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1846 #undef TARGET_SET_CURRENT_FUNCTION
1847 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1849 #undef TARGET_LEGITIMATE_CONSTANT_P
1850 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1852 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1853 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1855 #undef TARGET_CAN_USE_DOLOOP_P
1856 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1858 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1859 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1861 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1862 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1863 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1864 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1865 #undef TARGET_UNWIND_WORD_MODE
1866 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1868 #undef TARGET_OFFLOAD_OPTIONS
1869 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1871 #undef TARGET_C_MODE_FOR_SUFFIX
1872 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1874 #undef TARGET_INVALID_BINARY_OP
1875 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1877 #undef TARGET_OPTAB_SUPPORTED_P
1878 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1880 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1881 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1884 /* Processor table. */
1885 struct rs6000_ptt
1887 const char *const name; /* Canonical processor name. */
1888 const enum processor_type processor; /* Processor type enum value. */
1889 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1892 static struct rs6000_ptt const processor_target_table[] =
1894 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1895 #include "rs6000-cpus.def"
1896 #undef RS6000_CPU
1899 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1900 name is invalid. */
1902 static int
1903 rs6000_cpu_name_lookup (const char *name)
1905 size_t i;
1907 if (name != NULL)
1909 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1910 if (! strcmp (name, processor_target_table[i].name))
1911 return (int)i;
1914 return -1;
1918 /* Return number of consecutive hard regs needed starting at reg REGNO
1919 to hold something of mode MODE.
1920 This is ordinarily the length in words of a value of mode MODE
1921 but can be less for certain modes in special long registers.
1923 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1924 scalar instructions. The upper 32 bits are only available to the
1925 SIMD instructions.
1927 POWER and PowerPC GPRs hold 32 bits worth;
1928 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1930 static int
1931 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1933 unsigned HOST_WIDE_INT reg_size;
1935 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1936 128-bit floating point that can go in vector registers, which has VSX
1937 memory addressing. */
1938 if (FP_REGNO_P (regno))
1939 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1940 ? UNITS_PER_VSX_WORD
1941 : UNITS_PER_FP_WORD);
1943 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1944 reg_size = UNITS_PER_SPE_WORD;
1946 else if (ALTIVEC_REGNO_P (regno))
1947 reg_size = UNITS_PER_ALTIVEC_WORD;
1949 /* The value returned for SCmode in the E500 double case is 2 for
1950 ABI compatibility; storing an SCmode value in a single register
1951 would require function_arg and rs6000_spe_function_arg to handle
1952 SCmode so as to pass the value correctly in a pair of
1953 registers. */
1954 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1955 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1956 reg_size = UNITS_PER_FP_WORD;
1958 else
1959 reg_size = UNITS_PER_WORD;
1961 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1964 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1965 MODE. */
1966 static int
1967 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1969 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1971 if (COMPLEX_MODE_P (mode))
1972 mode = GET_MODE_INNER (mode);
1974 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1975 register combinations, and use PTImode where we need to deal with quad
1976 word memory operations. Don't allow quad words in the argument or frame
1977 pointer registers, just registers 0..31. */
1978 if (mode == PTImode)
1979 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1980 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1981 && ((regno & 1) == 0));
1983 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1984 implementations. Don't allow an item to be split between a FP register
1985 and an Altivec register. Allow TImode in all VSX registers if the user
1986 asked for it. */
1987 if (TARGET_VSX && VSX_REGNO_P (regno)
1988 && (VECTOR_MEM_VSX_P (mode)
1989 || FLOAT128_VECTOR_P (mode)
1990 || reg_addr[mode].scalar_in_vmx_p
1991 || (TARGET_VSX_TIMODE && mode == TImode)
1992 || (TARGET_VADDUQM && mode == V1TImode)))
1994 if (FP_REGNO_P (regno))
1995 return FP_REGNO_P (last_regno);
1997 if (ALTIVEC_REGNO_P (regno))
1999 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2000 return 0;
2002 return ALTIVEC_REGNO_P (last_regno);
2006 /* The GPRs can hold any mode, but values bigger than one register
2007 cannot go past R31. */
2008 if (INT_REGNO_P (regno))
2009 return INT_REGNO_P (last_regno);
2011 /* The float registers (except for VSX vector modes) can only hold floating
2012 modes and DImode. */
2013 if (FP_REGNO_P (regno))
2015 if (FLOAT128_VECTOR_P (mode))
2016 return false;
2018 if (SCALAR_FLOAT_MODE_P (mode)
2019 && (mode != TDmode || (regno % 2) == 0)
2020 && FP_REGNO_P (last_regno))
2021 return 1;
2023 if (GET_MODE_CLASS (mode) == MODE_INT)
2025 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2026 return 1;
2028 if (TARGET_VSX_SMALL_INTEGER)
2030 if (mode == SImode)
2031 return 1;
2033 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2034 return 1;
2038 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2039 && PAIRED_VECTOR_MODE (mode))
2040 return 1;
2042 return 0;
2045 /* The CR register can only hold CC modes. */
2046 if (CR_REGNO_P (regno))
2047 return GET_MODE_CLASS (mode) == MODE_CC;
2049 if (CA_REGNO_P (regno))
2050 return mode == Pmode || mode == SImode;
2052 /* AltiVec only in AldyVec registers. */
2053 if (ALTIVEC_REGNO_P (regno))
2054 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2055 || mode == V1TImode);
2057 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2058 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2059 return 1;
2061 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2062 and it must be able to fit within the register set. */
2064 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2067 /* Print interesting facts about registers. */
2068 static void
2069 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2071 int r, m;
2073 for (r = first_regno; r <= last_regno; ++r)
2075 const char *comma = "";
2076 int len;
2078 if (first_regno == last_regno)
2079 fprintf (stderr, "%s:\t", reg_name);
2080 else
2081 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2083 len = 8;
2084 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2085 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2087 if (len > 70)
2089 fprintf (stderr, ",\n\t");
2090 len = 8;
2091 comma = "";
2094 if (rs6000_hard_regno_nregs[m][r] > 1)
2095 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2096 rs6000_hard_regno_nregs[m][r]);
2097 else
2098 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2100 comma = ", ";
2103 if (call_used_regs[r])
2105 if (len > 70)
2107 fprintf (stderr, ",\n\t");
2108 len = 8;
2109 comma = "";
2112 len += fprintf (stderr, "%s%s", comma, "call-used");
2113 comma = ", ";
2116 if (fixed_regs[r])
2118 if (len > 70)
2120 fprintf (stderr, ",\n\t");
2121 len = 8;
2122 comma = "";
2125 len += fprintf (stderr, "%s%s", comma, "fixed");
2126 comma = ", ";
2129 if (len > 70)
2131 fprintf (stderr, ",\n\t");
2132 comma = "";
2135 len += fprintf (stderr, "%sreg-class = %s", comma,
2136 reg_class_names[(int)rs6000_regno_regclass[r]]);
2137 comma = ", ";
2139 if (len > 70)
2141 fprintf (stderr, ",\n\t");
2142 comma = "";
2145 fprintf (stderr, "%sregno = %d\n", comma, r);
2149 static const char *
2150 rs6000_debug_vector_unit (enum rs6000_vector v)
2152 const char *ret;
2154 switch (v)
2156 case VECTOR_NONE: ret = "none"; break;
2157 case VECTOR_ALTIVEC: ret = "altivec"; break;
2158 case VECTOR_VSX: ret = "vsx"; break;
2159 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2160 case VECTOR_PAIRED: ret = "paired"; break;
2161 case VECTOR_SPE: ret = "spe"; break;
2162 case VECTOR_OTHER: ret = "other"; break;
2163 default: ret = "unknown"; break;
2166 return ret;
2169 /* Inner function printing just the address mask for a particular reload
2170 register class. */
2171 DEBUG_FUNCTION char *
2172 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2174 static char ret[8];
2175 char *p = ret;
2177 if ((mask & RELOAD_REG_VALID) != 0)
2178 *p++ = 'v';
2179 else if (keep_spaces)
2180 *p++ = ' ';
2182 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2183 *p++ = 'm';
2184 else if (keep_spaces)
2185 *p++ = ' ';
2187 if ((mask & RELOAD_REG_INDEXED) != 0)
2188 *p++ = 'i';
2189 else if (keep_spaces)
2190 *p++ = ' ';
2192 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2193 *p++ = 'O';
2194 else if ((mask & RELOAD_REG_OFFSET) != 0)
2195 *p++ = 'o';
2196 else if (keep_spaces)
2197 *p++ = ' ';
2199 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2200 *p++ = '+';
2201 else if (keep_spaces)
2202 *p++ = ' ';
2204 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2205 *p++ = '+';
2206 else if (keep_spaces)
2207 *p++ = ' ';
2209 if ((mask & RELOAD_REG_AND_M16) != 0)
2210 *p++ = '&';
2211 else if (keep_spaces)
2212 *p++ = ' ';
2214 *p = '\0';
2216 return ret;
2219 /* Print the address masks in a human readble fashion. */
2220 DEBUG_FUNCTION void
2221 rs6000_debug_print_mode (ssize_t m)
2223 ssize_t rc;
2224 int spaces = 0;
2225 bool fuse_extra_p;
2227 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2228 for (rc = 0; rc < N_RELOAD_REG; rc++)
2229 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2230 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2232 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2233 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2234 fprintf (stderr, " Reload=%c%c",
2235 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2236 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2237 else
2238 spaces += sizeof (" Reload=sl") - 1;
2240 if (reg_addr[m].scalar_in_vmx_p)
2242 fprintf (stderr, "%*s Upper=y", spaces, "");
2243 spaces = 0;
2245 else
2246 spaces += sizeof (" Upper=y") - 1;
2248 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2249 || reg_addr[m].fused_toc);
2250 if (!fuse_extra_p)
2252 for (rc = 0; rc < N_RELOAD_REG; rc++)
2254 if (rc != RELOAD_REG_ANY)
2256 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2257 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2258 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2259 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2260 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2262 fuse_extra_p = true;
2263 break;
2269 if (fuse_extra_p)
2271 fprintf (stderr, "%*s Fuse:", spaces, "");
2272 spaces = 0;
2274 for (rc = 0; rc < N_RELOAD_REG; rc++)
2276 if (rc != RELOAD_REG_ANY)
2278 char load, store;
2280 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2281 load = 'l';
2282 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2283 load = 'L';
2284 else
2285 load = '-';
2287 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2288 store = 's';
2289 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2290 store = 'S';
2291 else
2292 store = '-';
2294 if (load == '-' && store == '-')
2295 spaces += 5;
2296 else
2298 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2299 reload_reg_map[rc].name[0], load, store);
2300 spaces = 0;
2305 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2307 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2308 spaces = 0;
2310 else
2311 spaces += sizeof (" P8gpr") - 1;
2313 if (reg_addr[m].fused_toc)
2315 fprintf (stderr, "%*sToc", (spaces + 1), "");
2316 spaces = 0;
2318 else
2319 spaces += sizeof (" Toc") - 1;
2321 else
2322 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2324 if (rs6000_vector_unit[m] != VECTOR_NONE
2325 || rs6000_vector_mem[m] != VECTOR_NONE)
2327 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2328 spaces, "",
2329 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2330 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2333 fputs ("\n", stderr);
2336 #define DEBUG_FMT_ID "%-32s= "
2337 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2338 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2339 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2341 /* Print various interesting information with -mdebug=reg. */
2342 static void
2343 rs6000_debug_reg_global (void)
2345 static const char *const tf[2] = { "false", "true" };
2346 const char *nl = (const char *)0;
2347 int m;
2348 size_t m1, m2, v;
2349 char costly_num[20];
2350 char nop_num[20];
2351 char flags_buffer[40];
2352 const char *costly_str;
2353 const char *nop_str;
2354 const char *trace_str;
2355 const char *abi_str;
2356 const char *cmodel_str;
2357 struct cl_target_option cl_opts;
2359 /* Modes we want tieable information on. */
2360 static const machine_mode print_tieable_modes[] = {
2361 QImode,
2362 HImode,
2363 SImode,
2364 DImode,
2365 TImode,
2366 PTImode,
2367 SFmode,
2368 DFmode,
2369 TFmode,
2370 IFmode,
2371 KFmode,
2372 SDmode,
2373 DDmode,
2374 TDmode,
2375 V8QImode,
2376 V4HImode,
2377 V2SImode,
2378 V16QImode,
2379 V8HImode,
2380 V4SImode,
2381 V2DImode,
2382 V1TImode,
2383 V32QImode,
2384 V16HImode,
2385 V8SImode,
2386 V4DImode,
2387 V2TImode,
2388 V2SFmode,
2389 V4SFmode,
2390 V2DFmode,
2391 V8SFmode,
2392 V4DFmode,
2393 CCmode,
2394 CCUNSmode,
2395 CCEQmode,
2398 /* Virtual regs we are interested in. */
2399 const static struct {
2400 int regno; /* register number. */
2401 const char *name; /* register name. */
2402 } virtual_regs[] = {
2403 { STACK_POINTER_REGNUM, "stack pointer:" },
2404 { TOC_REGNUM, "toc: " },
2405 { STATIC_CHAIN_REGNUM, "static chain: " },
2406 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2407 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2408 { ARG_POINTER_REGNUM, "arg pointer: " },
2409 { FRAME_POINTER_REGNUM, "frame pointer:" },
2410 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2411 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2412 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2413 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2414 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2415 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2416 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2417 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2418 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2421 fputs ("\nHard register information:\n", stderr);
2422 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2423 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2424 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2425 LAST_ALTIVEC_REGNO,
2426 "vs");
2427 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2428 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2429 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2430 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2431 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2432 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2433 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2434 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2436 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2437 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2438 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2440 fprintf (stderr,
2441 "\n"
2442 "d reg_class = %s\n"
2443 "f reg_class = %s\n"
2444 "v reg_class = %s\n"
2445 "wa reg_class = %s\n"
2446 "wb reg_class = %s\n"
2447 "wd reg_class = %s\n"
2448 "we reg_class = %s\n"
2449 "wf reg_class = %s\n"
2450 "wg reg_class = %s\n"
2451 "wh reg_class = %s\n"
2452 "wi reg_class = %s\n"
2453 "wj reg_class = %s\n"
2454 "wk reg_class = %s\n"
2455 "wl reg_class = %s\n"
2456 "wm reg_class = %s\n"
2457 "wo reg_class = %s\n"
2458 "wp reg_class = %s\n"
2459 "wq reg_class = %s\n"
2460 "wr reg_class = %s\n"
2461 "ws reg_class = %s\n"
2462 "wt reg_class = %s\n"
2463 "wu reg_class = %s\n"
2464 "wv reg_class = %s\n"
2465 "ww reg_class = %s\n"
2466 "wx reg_class = %s\n"
2467 "wy reg_class = %s\n"
2468 "wz reg_class = %s\n"
2469 "wH reg_class = %s\n"
2470 "wI reg_class = %s\n"
2471 "wJ reg_class = %s\n"
2472 "wK reg_class = %s\n"
2473 "\n",
2474 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2475 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2476 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2477 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2478 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2479 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2480 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2481 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2482 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2483 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2484 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2485 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2486 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2487 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2488 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2489 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2490 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2491 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2492 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2493 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2494 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2495 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2496 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2497 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2498 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2499 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2500 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2501 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2502 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2503 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2504 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2506 nl = "\n";
2507 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2508 rs6000_debug_print_mode (m);
2510 fputs ("\n", stderr);
2512 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2514 machine_mode mode1 = print_tieable_modes[m1];
2515 bool first_time = true;
2517 nl = (const char *)0;
2518 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2520 machine_mode mode2 = print_tieable_modes[m2];
2521 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2523 if (first_time)
2525 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2526 nl = "\n";
2527 first_time = false;
2530 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2534 if (!first_time)
2535 fputs ("\n", stderr);
2538 if (nl)
2539 fputs (nl, stderr);
2541 if (rs6000_recip_control)
2543 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2545 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2546 if (rs6000_recip_bits[m])
2548 fprintf (stderr,
2549 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2550 GET_MODE_NAME (m),
2551 (RS6000_RECIP_AUTO_RE_P (m)
2552 ? "auto"
2553 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2554 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2555 ? "auto"
2556 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2559 fputs ("\n", stderr);
2562 if (rs6000_cpu_index >= 0)
2564 const char *name = processor_target_table[rs6000_cpu_index].name;
2565 HOST_WIDE_INT flags
2566 = processor_target_table[rs6000_cpu_index].target_enable;
2568 sprintf (flags_buffer, "-mcpu=%s flags", name);
2569 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2571 else
2572 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2574 if (rs6000_tune_index >= 0)
2576 const char *name = processor_target_table[rs6000_tune_index].name;
2577 HOST_WIDE_INT flags
2578 = processor_target_table[rs6000_tune_index].target_enable;
2580 sprintf (flags_buffer, "-mtune=%s flags", name);
2581 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2583 else
2584 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2586 cl_target_option_save (&cl_opts, &global_options);
2587 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2588 rs6000_isa_flags);
2590 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2591 rs6000_isa_flags_explicit);
2593 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2594 rs6000_builtin_mask);
2596 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2598 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2599 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2601 switch (rs6000_sched_costly_dep)
2603 case max_dep_latency:
2604 costly_str = "max_dep_latency";
2605 break;
2607 case no_dep_costly:
2608 costly_str = "no_dep_costly";
2609 break;
2611 case all_deps_costly:
2612 costly_str = "all_deps_costly";
2613 break;
2615 case true_store_to_load_dep_costly:
2616 costly_str = "true_store_to_load_dep_costly";
2617 break;
2619 case store_to_load_dep_costly:
2620 costly_str = "store_to_load_dep_costly";
2621 break;
2623 default:
2624 costly_str = costly_num;
2625 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2626 break;
2629 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2631 switch (rs6000_sched_insert_nops)
2633 case sched_finish_regroup_exact:
2634 nop_str = "sched_finish_regroup_exact";
2635 break;
2637 case sched_finish_pad_groups:
2638 nop_str = "sched_finish_pad_groups";
2639 break;
2641 case sched_finish_none:
2642 nop_str = "sched_finish_none";
2643 break;
2645 default:
2646 nop_str = nop_num;
2647 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2648 break;
2651 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2653 switch (rs6000_sdata)
2655 default:
2656 case SDATA_NONE:
2657 break;
2659 case SDATA_DATA:
2660 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2661 break;
2663 case SDATA_SYSV:
2664 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2665 break;
2667 case SDATA_EABI:
2668 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2669 break;
2673 switch (rs6000_traceback)
2675 case traceback_default: trace_str = "default"; break;
2676 case traceback_none: trace_str = "none"; break;
2677 case traceback_part: trace_str = "part"; break;
2678 case traceback_full: trace_str = "full"; break;
2679 default: trace_str = "unknown"; break;
2682 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2684 switch (rs6000_current_cmodel)
2686 case CMODEL_SMALL: cmodel_str = "small"; break;
2687 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2688 case CMODEL_LARGE: cmodel_str = "large"; break;
2689 default: cmodel_str = "unknown"; break;
2692 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2694 switch (rs6000_current_abi)
2696 case ABI_NONE: abi_str = "none"; break;
2697 case ABI_AIX: abi_str = "aix"; break;
2698 case ABI_ELFv2: abi_str = "ELFv2"; break;
2699 case ABI_V4: abi_str = "V4"; break;
2700 case ABI_DARWIN: abi_str = "darwin"; break;
2701 default: abi_str = "unknown"; break;
2704 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2706 if (rs6000_altivec_abi)
2707 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2709 if (rs6000_spe_abi)
2710 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2712 if (rs6000_darwin64_abi)
2713 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2715 if (rs6000_float_gprs)
2716 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2718 fprintf (stderr, DEBUG_FMT_S, "fprs",
2719 (TARGET_FPRS ? "true" : "false"));
2721 fprintf (stderr, DEBUG_FMT_S, "single_float",
2722 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2724 fprintf (stderr, DEBUG_FMT_S, "double_float",
2725 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2727 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2728 (TARGET_SOFT_FLOAT ? "true" : "false"));
2730 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2731 (TARGET_E500_SINGLE ? "true" : "false"));
2733 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2734 (TARGET_E500_DOUBLE ? "true" : "false"));
2736 if (TARGET_LINK_STACK)
2737 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2739 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2741 if (TARGET_P8_FUSION)
2743 char options[80];
2745 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2746 if (TARGET_TOC_FUSION)
2747 strcat (options, ", toc");
2749 if (TARGET_P8_FUSION_SIGN)
2750 strcat (options, ", sign");
2752 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2755 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2756 TARGET_SECURE_PLT ? "secure" : "bss");
2757 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2758 aix_struct_return ? "aix" : "sysv");
2759 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2760 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2761 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2762 tf[!!rs6000_align_branch_targets]);
2763 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2764 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2765 rs6000_long_double_type_size);
2766 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2767 (int)rs6000_sched_restricted_insns_priority);
2768 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2769 (int)END_BUILTINS);
2770 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2771 (int)RS6000_BUILTIN_COUNT);
2773 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2774 (int)TARGET_FLOAT128_ENABLE_TYPE);
2776 if (TARGET_VSX)
2777 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2778 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2780 if (TARGET_DIRECT_MOVE_128)
2781 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2782 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2786 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2787 legitimate address support to figure out the appropriate addressing to
2788 use. */
2790 static void
2791 rs6000_setup_reg_addr_masks (void)
2793 ssize_t rc, reg, m, nregs;
2794 addr_mask_type any_addr_mask, addr_mask;
2796 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2798 machine_mode m2 = (machine_mode) m;
2799 bool complex_p = false;
2800 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2801 size_t msize;
2803 if (COMPLEX_MODE_P (m2))
2805 complex_p = true;
2806 m2 = GET_MODE_INNER (m2);
2809 msize = GET_MODE_SIZE (m2);
2811 /* SDmode is special in that we want to access it only via REG+REG
2812 addressing on power7 and above, since we want to use the LFIWZX and
2813 STFIWZX instructions to load it. */
2814 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2816 any_addr_mask = 0;
2817 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2819 addr_mask = 0;
2820 reg = reload_reg_map[rc].reg;
2822 /* Can mode values go in the GPR/FPR/Altivec registers? */
2823 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2825 bool small_int_vsx_p = (small_int_p
2826 && (rc == RELOAD_REG_FPR
2827 || rc == RELOAD_REG_VMX));
2829 nregs = rs6000_hard_regno_nregs[m][reg];
2830 addr_mask |= RELOAD_REG_VALID;
2832 /* Indicate if the mode takes more than 1 physical register. If
2833 it takes a single register, indicate it can do REG+REG
2834 addressing. Small integers in VSX registers can only do
2835 REG+REG addressing. */
2836 if (small_int_vsx_p)
2837 addr_mask |= RELOAD_REG_INDEXED;
2838 else if (nregs > 1 || m == BLKmode || complex_p)
2839 addr_mask |= RELOAD_REG_MULTIPLE;
2840 else
2841 addr_mask |= RELOAD_REG_INDEXED;
2843 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2844 addressing. Restrict addressing on SPE for 64-bit types
2845 because of the SUBREG hackery used to address 64-bit floats in
2846 '32-bit' GPRs. If we allow scalars into Altivec registers,
2847 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2849 if (TARGET_UPDATE
2850 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2851 && msize <= 8
2852 && !VECTOR_MODE_P (m2)
2853 && !FLOAT128_VECTOR_P (m2)
2854 && !complex_p
2855 && !small_int_vsx_p
2856 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2857 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2858 && !(TARGET_E500_DOUBLE && msize == 8))
2860 addr_mask |= RELOAD_REG_PRE_INCDEC;
2862 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2863 we don't allow PRE_MODIFY for some multi-register
2864 operations. */
2865 switch (m)
2867 default:
2868 addr_mask |= RELOAD_REG_PRE_MODIFY;
2869 break;
2871 case DImode:
2872 if (TARGET_POWERPC64)
2873 addr_mask |= RELOAD_REG_PRE_MODIFY;
2874 break;
2876 case DFmode:
2877 case DDmode:
2878 if (TARGET_DF_INSN)
2879 addr_mask |= RELOAD_REG_PRE_MODIFY;
2880 break;
2885 /* GPR and FPR registers can do REG+OFFSET addressing, except
2886 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2887 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2888 if ((addr_mask != 0) && !indexed_only_p
2889 && msize <= 8
2890 && (rc == RELOAD_REG_GPR
2891 || ((msize == 8 || m2 == SFmode)
2892 && (rc == RELOAD_REG_FPR
2893 || (rc == RELOAD_REG_VMX
2894 && TARGET_P9_DFORM_SCALAR)))))
2895 addr_mask |= RELOAD_REG_OFFSET;
2897 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2898 instructions are enabled. The offset for 128-bit VSX registers is
2899 only 12-bits. While GPRs can handle the full offset range, VSX
2900 registers can only handle the restricted range. */
2901 else if ((addr_mask != 0) && !indexed_only_p
2902 && msize == 16 && TARGET_P9_DFORM_VECTOR
2903 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2904 || (m2 == TImode && TARGET_VSX_TIMODE)))
2906 addr_mask |= RELOAD_REG_OFFSET;
2907 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2908 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2911 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2912 addressing on 128-bit types. */
2913 if (rc == RELOAD_REG_VMX && msize == 16
2914 && (addr_mask & RELOAD_REG_VALID) != 0)
2915 addr_mask |= RELOAD_REG_AND_M16;
2917 reg_addr[m].addr_mask[rc] = addr_mask;
2918 any_addr_mask |= addr_mask;
2921 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2926 /* Initialize the various global tables that are based on register size. */
2927 static void
2928 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2930 ssize_t r, m, c;
2931 int align64;
2932 int align32;
2934 /* Precalculate REGNO_REG_CLASS. */
2935 rs6000_regno_regclass[0] = GENERAL_REGS;
2936 for (r = 1; r < 32; ++r)
2937 rs6000_regno_regclass[r] = BASE_REGS;
2939 for (r = 32; r < 64; ++r)
2940 rs6000_regno_regclass[r] = FLOAT_REGS;
2942 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2943 rs6000_regno_regclass[r] = NO_REGS;
2945 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2946 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2948 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2949 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2950 rs6000_regno_regclass[r] = CR_REGS;
2952 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2953 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2954 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2955 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2956 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2957 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2958 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2959 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2960 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2961 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2962 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2963 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2965 /* Precalculate register class to simpler reload register class. We don't
2966 need all of the register classes that are combinations of different
2967 classes, just the simple ones that have constraint letters. */
2968 for (c = 0; c < N_REG_CLASSES; c++)
2969 reg_class_to_reg_type[c] = NO_REG_TYPE;
2971 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2972 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2973 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2974 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2975 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2976 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2977 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2978 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2979 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2980 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2981 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2982 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2984 if (TARGET_VSX)
2986 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2987 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2989 else
2991 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2992 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2995 /* Precalculate the valid memory formats as well as the vector information,
2996 this must be set up before the rs6000_hard_regno_nregs_internal calls
2997 below. */
2998 gcc_assert ((int)VECTOR_NONE == 0);
2999 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3000 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3002 gcc_assert ((int)CODE_FOR_nothing == 0);
3003 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3005 gcc_assert ((int)NO_REGS == 0);
3006 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3008 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3009 believes it can use native alignment or still uses 128-bit alignment. */
3010 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3012 align64 = 64;
3013 align32 = 32;
3015 else
3017 align64 = 128;
3018 align32 = 128;
3021 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3022 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3023 if (TARGET_FLOAT128_TYPE)
3025 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3026 rs6000_vector_align[KFmode] = 128;
3028 if (FLOAT128_IEEE_P (TFmode))
3030 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3031 rs6000_vector_align[TFmode] = 128;
3035 /* V2DF mode, VSX only. */
3036 if (TARGET_VSX)
3038 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3039 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3040 rs6000_vector_align[V2DFmode] = align64;
3043 /* V4SF mode, either VSX or Altivec. */
3044 if (TARGET_VSX)
3046 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3047 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3048 rs6000_vector_align[V4SFmode] = align32;
3050 else if (TARGET_ALTIVEC)
3052 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3053 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3054 rs6000_vector_align[V4SFmode] = align32;
3057 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3058 and stores. */
3059 if (TARGET_ALTIVEC)
3061 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3062 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3063 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3064 rs6000_vector_align[V4SImode] = align32;
3065 rs6000_vector_align[V8HImode] = align32;
3066 rs6000_vector_align[V16QImode] = align32;
3068 if (TARGET_VSX)
3070 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3071 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3072 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3074 else
3076 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3077 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3078 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3082 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3083 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3084 if (TARGET_VSX)
3086 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3087 rs6000_vector_unit[V2DImode]
3088 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3089 rs6000_vector_align[V2DImode] = align64;
3091 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3092 rs6000_vector_unit[V1TImode]
3093 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3094 rs6000_vector_align[V1TImode] = 128;
3097 /* DFmode, see if we want to use the VSX unit. Memory is handled
3098 differently, so don't set rs6000_vector_mem. */
3099 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3101 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3102 rs6000_vector_align[DFmode] = 64;
3105 /* SFmode, see if we want to use the VSX unit. */
3106 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3108 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3109 rs6000_vector_align[SFmode] = 32;
3112 /* Allow TImode in VSX register and set the VSX memory macros. */
3113 if (TARGET_VSX && TARGET_VSX_TIMODE)
3115 rs6000_vector_mem[TImode] = VECTOR_VSX;
3116 rs6000_vector_align[TImode] = align64;
3119 /* TODO add SPE and paired floating point vector support. */
3121 /* Register class constraints for the constraints that depend on compile
3122 switches. When the VSX code was added, different constraints were added
3123 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3124 of the VSX registers are used. The register classes for scalar floating
3125 point types is set, based on whether we allow that type into the upper
3126 (Altivec) registers. GCC has register classes to target the Altivec
3127 registers for load/store operations, to select using a VSX memory
3128 operation instead of the traditional floating point operation. The
3129 constraints are:
3131 d - Register class to use with traditional DFmode instructions.
3132 f - Register class to use with traditional SFmode instructions.
3133 v - Altivec register.
3134 wa - Any VSX register.
3135 wc - Reserved to represent individual CR bits (used in LLVM).
3136 wd - Preferred register class for V2DFmode.
3137 wf - Preferred register class for V4SFmode.
3138 wg - Float register for power6x move insns.
3139 wh - FP register for direct move instructions.
3140 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3141 wj - FP or VSX register to hold 64-bit integers for direct moves.
3142 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3143 wl - Float register if we can do 32-bit signed int loads.
3144 wm - VSX register for ISA 2.07 direct move operations.
3145 wn - always NO_REGS.
3146 wr - GPR if 64-bit mode is permitted.
3147 ws - Register class to do ISA 2.06 DF operations.
3148 wt - VSX register for TImode in VSX registers.
3149 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3150 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3151 ww - Register class to do SF conversions in with VSX operations.
3152 wx - Float register if we can do 32-bit int stores.
3153 wy - Register class to do ISA 2.07 SF operations.
3154 wz - Float register if we can do 32-bit unsigned int loads.
3155 wH - Altivec register if SImode is allowed in VSX registers.
3156 wI - VSX register if SImode is allowed in VSX registers.
3157 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3158 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3160 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3161 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3163 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3164 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3166 if (TARGET_VSX)
3168 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3169 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3170 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3172 if (TARGET_VSX_TIMODE)
3173 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3175 if (TARGET_UPPER_REGS_DF) /* DFmode */
3177 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3178 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3180 else
3181 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3183 if (TARGET_UPPER_REGS_DF) /* DImode */
3184 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3185 else
3186 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3189 /* Add conditional constraints based on various options, to allow us to
3190 collapse multiple insn patterns. */
3191 if (TARGET_ALTIVEC)
3192 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3194 if (TARGET_MFPGPR) /* DFmode */
3195 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3197 if (TARGET_LFIWAX)
3198 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3200 if (TARGET_DIRECT_MOVE)
3202 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3203 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3204 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3205 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3206 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3207 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3210 if (TARGET_POWERPC64)
3211 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3213 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3215 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3216 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3217 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3219 else if (TARGET_P8_VECTOR)
3221 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3222 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3224 else if (TARGET_VSX)
3225 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3227 if (TARGET_STFIWX)
3228 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3230 if (TARGET_LFIWZX)
3231 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3233 if (TARGET_FLOAT128_TYPE)
3235 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3236 if (FLOAT128_IEEE_P (TFmode))
3237 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3240 /* Support for new D-form instructions. */
3241 if (TARGET_P9_DFORM_SCALAR)
3242 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3244 /* Support for ISA 3.0 (power9) vectors. */
3245 if (TARGET_P9_VECTOR)
3246 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3248 /* Support for new direct moves (ISA 3.0 + 64bit). */
3249 if (TARGET_DIRECT_MOVE_128)
3250 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3252 /* Support small integers in VSX registers. */
3253 if (TARGET_VSX_SMALL_INTEGER)
3255 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3256 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3257 if (TARGET_P9_VECTOR)
3259 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3260 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3264 /* Set up the reload helper and direct move functions. */
3265 if (TARGET_VSX || TARGET_ALTIVEC)
3267 if (TARGET_64BIT)
3269 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3270 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3271 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3272 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3273 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3274 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3275 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3276 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3277 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3278 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3279 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3280 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3281 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3282 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3283 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3284 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3285 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3286 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3287 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3288 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3290 if (FLOAT128_VECTOR_P (KFmode))
3292 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3293 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3296 if (FLOAT128_VECTOR_P (TFmode))
3298 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3299 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3302 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3303 available. */
3304 if (TARGET_NO_SDMODE_STACK)
3306 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3307 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3310 if (TARGET_VSX_TIMODE)
3312 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3313 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3316 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3318 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3319 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3320 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3321 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3322 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3323 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3324 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3325 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3326 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3328 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3329 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3330 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3331 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3332 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3333 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3334 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3335 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3336 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3338 if (FLOAT128_VECTOR_P (KFmode))
3340 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3341 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3344 if (FLOAT128_VECTOR_P (TFmode))
3346 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3347 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3351 else
3353 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3354 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3355 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3356 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3357 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3358 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3359 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3360 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3361 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3362 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3363 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3364 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3365 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3366 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3367 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3368 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3369 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3370 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3371 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3372 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3374 if (FLOAT128_VECTOR_P (KFmode))
3376 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3377 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3380 if (FLOAT128_IEEE_P (TFmode))
3382 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3383 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3386 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3387 available. */
3388 if (TARGET_NO_SDMODE_STACK)
3390 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3391 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3394 if (TARGET_VSX_TIMODE)
3396 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3397 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3400 if (TARGET_DIRECT_MOVE)
3402 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3403 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3404 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3408 if (TARGET_UPPER_REGS_DF)
3409 reg_addr[DFmode].scalar_in_vmx_p = true;
3411 if (TARGET_UPPER_REGS_DI)
3412 reg_addr[DImode].scalar_in_vmx_p = true;
3414 if (TARGET_UPPER_REGS_SF)
3415 reg_addr[SFmode].scalar_in_vmx_p = true;
3417 if (TARGET_VSX_SMALL_INTEGER)
3419 reg_addr[SImode].scalar_in_vmx_p = true;
3420 if (TARGET_P9_VECTOR)
3422 reg_addr[HImode].scalar_in_vmx_p = true;
3423 reg_addr[QImode].scalar_in_vmx_p = true;
3428 /* Setup the fusion operations. */
3429 if (TARGET_P8_FUSION)
3431 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3432 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3433 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3434 if (TARGET_64BIT)
3435 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3438 if (TARGET_P9_FUSION)
3440 struct fuse_insns {
3441 enum machine_mode mode; /* mode of the fused type. */
3442 enum machine_mode pmode; /* pointer mode. */
3443 enum rs6000_reload_reg_type rtype; /* register type. */
3444 enum insn_code load; /* load insn. */
3445 enum insn_code store; /* store insn. */
3448 static const struct fuse_insns addis_insns[] = {
3449 { SFmode, DImode, RELOAD_REG_FPR,
3450 CODE_FOR_fusion_vsx_di_sf_load,
3451 CODE_FOR_fusion_vsx_di_sf_store },
3453 { SFmode, SImode, RELOAD_REG_FPR,
3454 CODE_FOR_fusion_vsx_si_sf_load,
3455 CODE_FOR_fusion_vsx_si_sf_store },
3457 { DFmode, DImode, RELOAD_REG_FPR,
3458 CODE_FOR_fusion_vsx_di_df_load,
3459 CODE_FOR_fusion_vsx_di_df_store },
3461 { DFmode, SImode, RELOAD_REG_FPR,
3462 CODE_FOR_fusion_vsx_si_df_load,
3463 CODE_FOR_fusion_vsx_si_df_store },
3465 { DImode, DImode, RELOAD_REG_FPR,
3466 CODE_FOR_fusion_vsx_di_di_load,
3467 CODE_FOR_fusion_vsx_di_di_store },
3469 { DImode, SImode, RELOAD_REG_FPR,
3470 CODE_FOR_fusion_vsx_si_di_load,
3471 CODE_FOR_fusion_vsx_si_di_store },
3473 { QImode, DImode, RELOAD_REG_GPR,
3474 CODE_FOR_fusion_gpr_di_qi_load,
3475 CODE_FOR_fusion_gpr_di_qi_store },
3477 { QImode, SImode, RELOAD_REG_GPR,
3478 CODE_FOR_fusion_gpr_si_qi_load,
3479 CODE_FOR_fusion_gpr_si_qi_store },
3481 { HImode, DImode, RELOAD_REG_GPR,
3482 CODE_FOR_fusion_gpr_di_hi_load,
3483 CODE_FOR_fusion_gpr_di_hi_store },
3485 { HImode, SImode, RELOAD_REG_GPR,
3486 CODE_FOR_fusion_gpr_si_hi_load,
3487 CODE_FOR_fusion_gpr_si_hi_store },
3489 { SImode, DImode, RELOAD_REG_GPR,
3490 CODE_FOR_fusion_gpr_di_si_load,
3491 CODE_FOR_fusion_gpr_di_si_store },
3493 { SImode, SImode, RELOAD_REG_GPR,
3494 CODE_FOR_fusion_gpr_si_si_load,
3495 CODE_FOR_fusion_gpr_si_si_store },
3497 { SFmode, DImode, RELOAD_REG_GPR,
3498 CODE_FOR_fusion_gpr_di_sf_load,
3499 CODE_FOR_fusion_gpr_di_sf_store },
3501 { SFmode, SImode, RELOAD_REG_GPR,
3502 CODE_FOR_fusion_gpr_si_sf_load,
3503 CODE_FOR_fusion_gpr_si_sf_store },
3505 { DImode, DImode, RELOAD_REG_GPR,
3506 CODE_FOR_fusion_gpr_di_di_load,
3507 CODE_FOR_fusion_gpr_di_di_store },
3509 { DFmode, DImode, RELOAD_REG_GPR,
3510 CODE_FOR_fusion_gpr_di_df_load,
3511 CODE_FOR_fusion_gpr_di_df_store },
3514 enum machine_mode cur_pmode = Pmode;
3515 size_t i;
3517 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3519 enum machine_mode xmode = addis_insns[i].mode;
3520 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3522 if (addis_insns[i].pmode != cur_pmode)
3523 continue;
3525 if (rtype == RELOAD_REG_FPR
3526 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3527 continue;
3529 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3530 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3532 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3534 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3535 = addis_insns[i].load;
3536 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3537 = addis_insns[i].store;
3542 /* Note which types we support fusing TOC setup plus memory insn. We only do
3543 fused TOCs for medium/large code models. */
3544 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3545 && (TARGET_CMODEL != CMODEL_SMALL))
3547 reg_addr[QImode].fused_toc = true;
3548 reg_addr[HImode].fused_toc = true;
3549 reg_addr[SImode].fused_toc = true;
3550 reg_addr[DImode].fused_toc = true;
3551 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3553 if (TARGET_SINGLE_FLOAT)
3554 reg_addr[SFmode].fused_toc = true;
3555 if (TARGET_DOUBLE_FLOAT)
3556 reg_addr[DFmode].fused_toc = true;
3560 /* Precalculate HARD_REGNO_NREGS. */
3561 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3562 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3563 rs6000_hard_regno_nregs[m][r]
3564 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3566 /* Precalculate HARD_REGNO_MODE_OK. */
3567 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3568 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3569 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3570 rs6000_hard_regno_mode_ok_p[m][r] = true;
3572 /* Precalculate CLASS_MAX_NREGS sizes. */
3573 for (c = 0; c < LIM_REG_CLASSES; ++c)
3575 int reg_size;
3577 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3578 reg_size = UNITS_PER_VSX_WORD;
3580 else if (c == ALTIVEC_REGS)
3581 reg_size = UNITS_PER_ALTIVEC_WORD;
3583 else if (c == FLOAT_REGS)
3584 reg_size = UNITS_PER_FP_WORD;
3586 else
3587 reg_size = UNITS_PER_WORD;
3589 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3591 machine_mode m2 = (machine_mode)m;
3592 int reg_size2 = reg_size;
3594 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3595 in VSX. */
3596 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3597 reg_size2 = UNITS_PER_FP_WORD;
3599 rs6000_class_max_nregs[m][c]
3600 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3604 if (TARGET_E500_DOUBLE)
3605 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3607 /* Calculate which modes to automatically generate code to use a the
3608 reciprocal divide and square root instructions. In the future, possibly
3609 automatically generate the instructions even if the user did not specify
3610 -mrecip. The older machines double precision reciprocal sqrt estimate is
3611 not accurate enough. */
3612 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3613 if (TARGET_FRES)
3614 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3615 if (TARGET_FRE)
3616 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3617 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3618 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3619 if (VECTOR_UNIT_VSX_P (V2DFmode))
3620 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3622 if (TARGET_FRSQRTES)
3623 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3624 if (TARGET_FRSQRTE)
3625 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3626 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3627 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3628 if (VECTOR_UNIT_VSX_P (V2DFmode))
3629 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3631 if (rs6000_recip_control)
3633 if (!flag_finite_math_only)
3634 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3635 if (flag_trapping_math)
3636 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3637 if (!flag_reciprocal_math)
3638 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3639 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3641 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3642 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3643 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3645 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3646 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3647 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3649 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3650 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3651 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3653 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3654 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3655 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3657 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3658 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3659 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3661 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3662 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3663 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3665 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3666 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3667 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3669 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3670 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3671 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3675 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3676 legitimate address support to figure out the appropriate addressing to
3677 use. */
3678 rs6000_setup_reg_addr_masks ();
3680 if (global_init_p || TARGET_DEBUG_TARGET)
3682 if (TARGET_DEBUG_REG)
3683 rs6000_debug_reg_global ();
3685 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3686 fprintf (stderr,
3687 "SImode variable mult cost = %d\n"
3688 "SImode constant mult cost = %d\n"
3689 "SImode short constant mult cost = %d\n"
3690 "DImode multipliciation cost = %d\n"
3691 "SImode division cost = %d\n"
3692 "DImode division cost = %d\n"
3693 "Simple fp operation cost = %d\n"
3694 "DFmode multiplication cost = %d\n"
3695 "SFmode division cost = %d\n"
3696 "DFmode division cost = %d\n"
3697 "cache line size = %d\n"
3698 "l1 cache size = %d\n"
3699 "l2 cache size = %d\n"
3700 "simultaneous prefetches = %d\n"
3701 "\n",
3702 rs6000_cost->mulsi,
3703 rs6000_cost->mulsi_const,
3704 rs6000_cost->mulsi_const9,
3705 rs6000_cost->muldi,
3706 rs6000_cost->divsi,
3707 rs6000_cost->divdi,
3708 rs6000_cost->fp,
3709 rs6000_cost->dmul,
3710 rs6000_cost->sdiv,
3711 rs6000_cost->ddiv,
3712 rs6000_cost->cache_line_size,
3713 rs6000_cost->l1_cache_size,
3714 rs6000_cost->l2_cache_size,
3715 rs6000_cost->simultaneous_prefetches);
3719 #if TARGET_MACHO
3720 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3722 static void
3723 darwin_rs6000_override_options (void)
3725 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3726 off. */
3727 rs6000_altivec_abi = 1;
3728 TARGET_ALTIVEC_VRSAVE = 1;
3729 rs6000_current_abi = ABI_DARWIN;
3731 if (DEFAULT_ABI == ABI_DARWIN
3732 && TARGET_64BIT)
3733 darwin_one_byte_bool = 1;
3735 if (TARGET_64BIT && ! TARGET_POWERPC64)
3737 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3738 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3740 if (flag_mkernel)
3742 rs6000_default_long_calls = 1;
3743 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3746 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3747 Altivec. */
3748 if (!flag_mkernel && !flag_apple_kext
3749 && TARGET_64BIT
3750 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3751 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3753 /* Unless the user (not the configurer) has explicitly overridden
3754 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3755 G4 unless targeting the kernel. */
3756 if (!flag_mkernel
3757 && !flag_apple_kext
3758 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3759 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3760 && ! global_options_set.x_rs6000_cpu_index)
3762 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3765 #endif
3767 /* If not otherwise specified by a target, make 'long double' equivalent to
3768 'double'. */
3770 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3771 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3772 #endif
3774 /* Return the builtin mask of the various options used that could affect which
3775 builtins were used. In the past we used target_flags, but we've run out of
3776 bits, and some options like SPE and PAIRED are no longer in
3777 target_flags. */
3779 HOST_WIDE_INT
3780 rs6000_builtin_mask_calculate (void)
3782 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3783 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3784 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3785 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3786 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3787 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3788 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3789 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3790 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3791 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3792 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3793 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3794 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3795 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3796 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3797 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3798 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3799 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3800 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3801 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3802 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3805 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3806 to clobber the XER[CA] bit because clobbering that bit without telling
3807 the compiler worked just fine with versions of GCC before GCC 5, and
3808 breaking a lot of older code in ways that are hard to track down is
3809 not such a great idea. */
3811 static rtx_insn *
3812 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3813 vec<const char *> &/*constraints*/,
3814 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3816 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3817 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3818 return NULL;
3821 /* Override command line options. Mostly we process the processor type and
3822 sometimes adjust other TARGET_ options. */
3824 static bool
3825 rs6000_option_override_internal (bool global_init_p)
3827 bool ret = true;
3828 bool have_cpu = false;
3830 /* The default cpu requested at configure time, if any. */
3831 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3833 HOST_WIDE_INT set_masks;
3834 int cpu_index;
3835 int tune_index;
3836 struct cl_target_option *main_target_opt
3837 = ((global_init_p || target_option_default_node == NULL)
3838 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3840 /* Print defaults. */
3841 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3842 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3844 /* Remember the explicit arguments. */
3845 if (global_init_p)
3846 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3848 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3849 library functions, so warn about it. The flag may be useful for
3850 performance studies from time to time though, so don't disable it
3851 entirely. */
3852 if (global_options_set.x_rs6000_alignment_flags
3853 && rs6000_alignment_flags == MASK_ALIGN_POWER
3854 && DEFAULT_ABI == ABI_DARWIN
3855 && TARGET_64BIT)
3856 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3857 " it is incompatible with the installed C and C++ libraries");
3859 /* Numerous experiment shows that IRA based loop pressure
3860 calculation works better for RTL loop invariant motion on targets
3861 with enough (>= 32) registers. It is an expensive optimization.
3862 So it is on only for peak performance. */
3863 if (optimize >= 3 && global_init_p
3864 && !global_options_set.x_flag_ira_loop_pressure)
3865 flag_ira_loop_pressure = 1;
3867 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3868 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3869 options were already specified. */
3870 if (flag_sanitize & SANITIZE_USER_ADDRESS
3871 && !global_options_set.x_flag_asynchronous_unwind_tables)
3872 flag_asynchronous_unwind_tables = 1;
3874 /* Set the pointer size. */
3875 if (TARGET_64BIT)
3877 rs6000_pmode = (int)DImode;
3878 rs6000_pointer_size = 64;
3880 else
3882 rs6000_pmode = (int)SImode;
3883 rs6000_pointer_size = 32;
3886 /* Some OSs don't support saving the high part of 64-bit registers on context
3887 switch. Other OSs don't support saving Altivec registers. On those OSs,
3888 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3889 if the user wants either, the user must explicitly specify them and we
3890 won't interfere with the user's specification. */
3892 set_masks = POWERPC_MASKS;
3893 #ifdef OS_MISSING_POWERPC64
3894 if (OS_MISSING_POWERPC64)
3895 set_masks &= ~OPTION_MASK_POWERPC64;
3896 #endif
3897 #ifdef OS_MISSING_ALTIVEC
3898 if (OS_MISSING_ALTIVEC)
3899 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3900 #endif
3902 /* Don't override by the processor default if given explicitly. */
3903 set_masks &= ~rs6000_isa_flags_explicit;
3905 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3906 the cpu in a target attribute or pragma, but did not specify a tuning
3907 option, use the cpu for the tuning option rather than the option specified
3908 with -mtune on the command line. Process a '--with-cpu' configuration
3909 request as an implicit --cpu. */
3910 if (rs6000_cpu_index >= 0)
3912 cpu_index = rs6000_cpu_index;
3913 have_cpu = true;
3915 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3917 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3918 have_cpu = true;
3920 else if (implicit_cpu)
3922 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3923 have_cpu = true;
3925 else
3927 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3928 const char *default_cpu = ((!TARGET_POWERPC64)
3929 ? "powerpc"
3930 : ((BYTES_BIG_ENDIAN)
3931 ? "powerpc64"
3932 : "powerpc64le"));
3934 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3935 have_cpu = false;
3938 gcc_assert (cpu_index >= 0);
3940 if (have_cpu)
3942 #ifndef HAVE_AS_POWER9
3943 if (processor_target_table[rs6000_cpu_index].processor
3944 == PROCESSOR_POWER9)
3946 have_cpu = false;
3947 warning (0, "will not generate power9 instructions because "
3948 "assembler lacks power9 support");
3950 #endif
3951 #ifndef HAVE_AS_POWER8
3952 if (processor_target_table[rs6000_cpu_index].processor
3953 == PROCESSOR_POWER8)
3955 have_cpu = false;
3956 warning (0, "will not generate power8 instructions because "
3957 "assembler lacks power8 support");
3959 #endif
3960 #ifndef HAVE_AS_POPCNTD
3961 if (processor_target_table[rs6000_cpu_index].processor
3962 == PROCESSOR_POWER7)
3964 have_cpu = false;
3965 warning (0, "will not generate power7 instructions because "
3966 "assembler lacks power7 support");
3968 #endif
3969 #ifndef HAVE_AS_DFP
3970 if (processor_target_table[rs6000_cpu_index].processor
3971 == PROCESSOR_POWER6)
3973 have_cpu = false;
3974 warning (0, "will not generate power6 instructions because "
3975 "assembler lacks power6 support");
3977 #endif
3978 #ifndef HAVE_AS_POPCNTB
3979 if (processor_target_table[rs6000_cpu_index].processor
3980 == PROCESSOR_POWER5)
3982 have_cpu = false;
3983 warning (0, "will not generate power5 instructions because "
3984 "assembler lacks power5 support");
3986 #endif
3988 if (!have_cpu)
3990 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3991 const char *default_cpu = (!TARGET_POWERPC64
3992 ? "powerpc"
3993 : (BYTES_BIG_ENDIAN
3994 ? "powerpc64"
3995 : "powerpc64le"));
3997 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4001 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4002 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4003 with those from the cpu, except for options that were explicitly set. If
4004 we don't have a cpu, do not override the target bits set in
4005 TARGET_DEFAULT. */
4006 if (have_cpu)
4008 rs6000_isa_flags &= ~set_masks;
4009 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4010 & set_masks);
4012 else
4014 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4015 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4016 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4017 to using rs6000_isa_flags, we need to do the initialization here.
4019 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4020 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4021 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4022 : processor_target_table[cpu_index].target_enable);
4023 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4026 if (rs6000_tune_index >= 0)
4027 tune_index = rs6000_tune_index;
4028 else if (have_cpu)
4029 rs6000_tune_index = tune_index = cpu_index;
4030 else
4032 size_t i;
4033 enum processor_type tune_proc
4034 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4036 tune_index = -1;
4037 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4038 if (processor_target_table[i].processor == tune_proc)
4040 rs6000_tune_index = tune_index = i;
4041 break;
4045 gcc_assert (tune_index >= 0);
4046 rs6000_cpu = processor_target_table[tune_index].processor;
4048 /* Pick defaults for SPE related control flags. Do this early to make sure
4049 that the TARGET_ macros are representative ASAP. */
4051 int spe_capable_cpu =
4052 (rs6000_cpu == PROCESSOR_PPC8540
4053 || rs6000_cpu == PROCESSOR_PPC8548);
4055 if (!global_options_set.x_rs6000_spe_abi)
4056 rs6000_spe_abi = spe_capable_cpu;
4058 if (!global_options_set.x_rs6000_spe)
4059 rs6000_spe = spe_capable_cpu;
4061 if (!global_options_set.x_rs6000_float_gprs)
4062 rs6000_float_gprs =
4063 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
4064 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
4065 : 0);
4068 if (global_options_set.x_rs6000_spe_abi
4069 && rs6000_spe_abi
4070 && !TARGET_SPE_ABI)
4071 error ("not configured for SPE ABI");
4073 if (global_options_set.x_rs6000_spe
4074 && rs6000_spe
4075 && !TARGET_SPE)
4076 error ("not configured for SPE instruction set");
4078 if (main_target_opt != NULL
4079 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
4080 || (main_target_opt->x_rs6000_spe != rs6000_spe)
4081 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4082 error ("target attribute or pragma changes SPE ABI");
4084 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4085 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4086 || rs6000_cpu == PROCESSOR_PPCE5500)
4088 if (TARGET_ALTIVEC)
4089 error ("AltiVec not supported in this target");
4090 if (TARGET_SPE)
4091 error ("SPE not supported in this target");
4093 if (rs6000_cpu == PROCESSOR_PPCE6500)
4095 if (TARGET_SPE)
4096 error ("SPE not supported in this target");
4099 /* Disable Cell microcode if we are optimizing for the Cell
4100 and not optimizing for size. */
4101 if (rs6000_gen_cell_microcode == -1)
4102 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4103 && !optimize_size);
4105 /* If we are optimizing big endian systems for space and it's OK to
4106 use instructions that would be microcoded on the Cell, use the
4107 load/store multiple and string instructions. */
4108 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4109 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4110 | OPTION_MASK_STRING);
4112 /* Don't allow -mmultiple or -mstring on little endian systems
4113 unless the cpu is a 750, because the hardware doesn't support the
4114 instructions used in little endian mode, and causes an alignment
4115 trap. The 750 does not cause an alignment trap (except when the
4116 target is unaligned). */
4118 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4120 if (TARGET_MULTIPLE)
4122 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4123 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4124 warning (0, "-mmultiple is not supported on little endian systems");
4127 if (TARGET_STRING)
4129 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4130 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4131 warning (0, "-mstring is not supported on little endian systems");
4135 /* If little-endian, default to -mstrict-align on older processors.
4136 Testing for htm matches power8 and later. */
4137 if (!BYTES_BIG_ENDIAN
4138 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4139 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4141 /* -maltivec={le,be} implies -maltivec. */
4142 if (rs6000_altivec_element_order != 0)
4143 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4145 /* Disallow -maltivec=le in big endian mode for now. This is not
4146 known to be useful for anyone. */
4147 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4149 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4150 rs6000_altivec_element_order = 0;
4153 /* Add some warnings for VSX. */
4154 if (TARGET_VSX)
4156 const char *msg = NULL;
4157 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4158 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4160 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4161 msg = N_("-mvsx requires hardware floating point");
4162 else
4164 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4165 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4168 else if (TARGET_PAIRED_FLOAT)
4169 msg = N_("-mvsx and -mpaired are incompatible");
4170 else if (TARGET_AVOID_XFORM > 0)
4171 msg = N_("-mvsx needs indexed addressing");
4172 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4173 & OPTION_MASK_ALTIVEC))
4175 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4176 msg = N_("-mvsx and -mno-altivec are incompatible");
4177 else
4178 msg = N_("-mno-altivec disables vsx");
4181 if (msg)
4183 warning (0, msg);
4184 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4185 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4189 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4190 the -mcpu setting to enable options that conflict. */
4191 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4192 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4193 | OPTION_MASK_ALTIVEC
4194 | OPTION_MASK_VSX)) != 0)
4195 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4196 | OPTION_MASK_DIRECT_MOVE)
4197 & ~rs6000_isa_flags_explicit);
4199 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4200 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4202 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4203 unless the user explicitly used the -mno-<option> to disable the code. */
4204 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4205 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4206 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4207 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4208 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4209 else if (TARGET_VSX)
4210 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4211 else if (TARGET_POPCNTD)
4212 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4213 else if (TARGET_DFP)
4214 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4215 else if (TARGET_CMPB)
4216 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4217 else if (TARGET_FPRND)
4218 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4219 else if (TARGET_POPCNTB)
4220 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4221 else if (TARGET_ALTIVEC)
4222 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4224 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4226 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4227 error ("-mcrypto requires -maltivec");
4228 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4231 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4233 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4234 error ("-mdirect-move requires -mvsx");
4235 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4238 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4240 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4241 error ("-mpower8-vector requires -maltivec");
4242 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4245 if (TARGET_P8_VECTOR && !TARGET_VSX)
4247 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4248 error ("-mpower8-vector requires -mvsx");
4249 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4252 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4254 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4255 error ("-mvsx-timode requires -mvsx");
4256 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4259 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4261 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4262 error ("-mhard-dfp requires -mhard-float");
4263 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4266 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4267 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4268 set the individual option. */
4269 if (TARGET_UPPER_REGS > 0)
4271 if (TARGET_VSX
4272 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4274 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4275 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4277 if (TARGET_VSX
4278 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4280 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4281 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4283 if (TARGET_P8_VECTOR
4284 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4286 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4287 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4290 else if (TARGET_UPPER_REGS == 0)
4292 if (TARGET_VSX
4293 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4295 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4296 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4298 if (TARGET_VSX
4299 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4301 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4302 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4304 if (TARGET_P8_VECTOR
4305 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4307 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4308 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4312 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4314 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4315 error ("-mupper-regs-df requires -mvsx");
4316 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4319 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4321 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4322 error ("-mupper-regs-di requires -mvsx");
4323 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4326 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4328 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4329 error ("-mupper-regs-sf requires -mpower8-vector");
4330 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4333 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4334 silently turn off quad memory mode. */
4335 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4337 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4338 warning (0, N_("-mquad-memory requires 64-bit mode"));
4340 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4341 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4343 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4344 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4347 /* Non-atomic quad memory load/store are disabled for little endian, since
4348 the words are reversed, but atomic operations can still be done by
4349 swapping the words. */
4350 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4352 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4353 warning (0, N_("-mquad-memory is not available in little endian mode"));
4355 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4358 /* Assume if the user asked for normal quad memory instructions, they want
4359 the atomic versions as well, unless they explicity told us not to use quad
4360 word atomic instructions. */
4361 if (TARGET_QUAD_MEMORY
4362 && !TARGET_QUAD_MEMORY_ATOMIC
4363 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4364 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4366 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4367 generating power8 instructions. */
4368 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4369 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4370 & OPTION_MASK_P8_FUSION);
4372 /* Setting additional fusion flags turns on base fusion. */
4373 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4375 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4377 if (TARGET_P8_FUSION_SIGN)
4378 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4380 if (TARGET_TOC_FUSION)
4381 error ("-mtoc-fusion requires -mpower8-fusion");
4383 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4385 else
4386 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4389 /* Power9 fusion is a superset over power8 fusion. */
4390 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4392 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4394 /* We prefer to not mention undocumented options in
4395 error messages. However, if users have managed to select
4396 power9-fusion without selecting power8-fusion, they
4397 already know about undocumented flags. */
4398 error ("-mpower9-fusion requires -mpower8-fusion");
4399 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4401 else
4402 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4405 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4406 generating power9 instructions. */
4407 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4408 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4409 & OPTION_MASK_P9_FUSION);
4411 /* Power8 does not fuse sign extended loads with the addis. If we are
4412 optimizing at high levels for speed, convert a sign extended load into a
4413 zero extending load, and an explicit sign extension. */
4414 if (TARGET_P8_FUSION
4415 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4416 && optimize_function_for_speed_p (cfun)
4417 && optimize >= 3)
4418 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4420 /* TOC fusion requires 64-bit and medium/large code model. */
4421 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4423 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4424 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4425 warning (0, N_("-mtoc-fusion requires 64-bit"));
4428 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4430 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4431 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4432 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4435 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4436 model. */
4437 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4438 && (TARGET_CMODEL != CMODEL_SMALL)
4439 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4440 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4442 /* ISA 3.0 vector instructions include ISA 2.07. */
4443 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4445 /* We prefer to not mention undocumented options in
4446 error messages. However, if users have managed to select
4447 power9-vector without selecting power8-vector, they
4448 already know about undocumented flags. */
4449 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4450 error ("-mpower9-vector requires -mpower8-vector");
4451 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4454 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4455 -mpower9-dform-vector. */
4456 if (TARGET_P9_DFORM_BOTH > 0)
4458 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4459 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4461 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4462 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4464 else if (TARGET_P9_DFORM_BOTH == 0)
4466 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4467 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4469 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4470 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4473 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4474 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4476 /* We prefer to not mention undocumented options in
4477 error messages. However, if users have managed to select
4478 power9-dform without selecting power9-vector, they
4479 already know about undocumented flags. */
4480 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4481 error ("-mpower9-dform requires -mpower9-vector");
4482 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4483 | OPTION_MASK_P9_DFORM_VECTOR);
4486 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4488 /* We prefer to not mention undocumented options in
4489 error messages. However, if users have managed to select
4490 power9-dform without selecting upper-regs-df, they
4491 already know about undocumented flags. */
4492 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4493 error ("-mpower9-dform requires -mupper-regs-df");
4494 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4497 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4499 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4500 error ("-mpower9-dform requires -mupper-regs-sf");
4501 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4504 /* Enable LRA by default. */
4505 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4506 rs6000_isa_flags |= OPTION_MASK_LRA;
4508 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4509 but do show up with -mno-lra. Given -mlra will become the default once
4510 PR 69847 is fixed, turn off the options with problems by default if
4511 -mno-lra was used, and warn if the user explicitly asked for the option.
4513 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4514 Enable -mvsx-timode by default if LRA and VSX. */
4515 if (!TARGET_LRA)
4517 if (TARGET_VSX_TIMODE)
4519 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4520 warning (0, "-mvsx-timode might need -mlra");
4522 else
4523 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4527 else
4529 if (TARGET_VSX && !TARGET_VSX_TIMODE
4530 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4531 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4534 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4535 support. If we only have ISA 2.06 support, and the user did not specify
4536 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4537 but we don't enable the full vectorization support */
4538 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4539 TARGET_ALLOW_MOVMISALIGN = 1;
4541 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4543 if (TARGET_ALLOW_MOVMISALIGN > 0
4544 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4545 error ("-mallow-movmisalign requires -mvsx");
4547 TARGET_ALLOW_MOVMISALIGN = 0;
4550 /* Determine when unaligned vector accesses are permitted, and when
4551 they are preferred over masked Altivec loads. Note that if
4552 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4553 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4554 not true. */
4555 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4557 if (!TARGET_VSX)
4559 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4560 error ("-mefficient-unaligned-vsx requires -mvsx");
4562 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4565 else if (!TARGET_ALLOW_MOVMISALIGN)
4567 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4568 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4570 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4574 /* Check whether we should allow small integers into VSX registers. We
4575 require direct move to prevent the register allocator from having to move
4576 variables through memory to do moves. SImode can be used on ISA 2.07,
4577 while HImode and QImode require ISA 3.0. */
4578 if (TARGET_VSX_SMALL_INTEGER
4579 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4581 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4582 error ("-mvsx-small-integer requires -mpower8-vector, "
4583 "-mupper-regs-di, and -mdirect-move");
4585 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4588 /* Set long double size before the IEEE 128-bit tests. */
4589 if (!global_options_set.x_rs6000_long_double_type_size)
4591 if (main_target_opt != NULL
4592 && (main_target_opt->x_rs6000_long_double_type_size
4593 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4594 error ("target attribute or pragma changes long double size");
4595 else
4596 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4599 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4600 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4601 pick up this default. */
4602 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4603 if (!global_options_set.x_rs6000_ieeequad)
4604 rs6000_ieeequad = 1;
4605 #endif
4607 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4608 sytems, but don't enable the __float128 keyword. */
4609 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4610 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4611 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4612 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4614 /* IEEE 128-bit floating point requires VSX support. */
4615 if (!TARGET_VSX)
4617 if (TARGET_FLOAT128_KEYWORD)
4619 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4620 error ("-mfloat128 requires VSX support");
4622 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4623 | OPTION_MASK_FLOAT128_KEYWORD
4624 | OPTION_MASK_FLOAT128_HW);
4627 else if (TARGET_FLOAT128_TYPE)
4629 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4630 error ("-mfloat128-type requires VSX support");
4632 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4633 | OPTION_MASK_FLOAT128_KEYWORD
4634 | OPTION_MASK_FLOAT128_HW);
4638 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4639 128-bit floating point support to be enabled. */
4640 if (!TARGET_FLOAT128_TYPE)
4642 if (TARGET_FLOAT128_KEYWORD)
4644 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4646 error ("-mfloat128 requires -mfloat128-type");
4647 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4648 | OPTION_MASK_FLOAT128_KEYWORD
4649 | OPTION_MASK_FLOAT128_HW);
4651 else
4652 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4655 if (TARGET_FLOAT128_HW)
4657 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4659 error ("-mfloat128-hardware requires -mfloat128-type");
4660 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4662 else
4663 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4664 | OPTION_MASK_FLOAT128_KEYWORD
4665 | OPTION_MASK_FLOAT128_HW);
4669 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4670 -mfloat128-hardware by default. However, don't enable the __float128
4671 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4672 -mfloat128 option as well if it was not already set. */
4673 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4674 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4675 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4676 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4678 if (TARGET_FLOAT128_HW
4679 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4681 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4682 error ("-mfloat128-hardware requires full ISA 3.0 support");
4684 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4687 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4688 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4689 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4690 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4692 /* Print the options after updating the defaults. */
4693 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4694 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4696 /* E500mc does "better" if we inline more aggressively. Respect the
4697 user's opinion, though. */
4698 if (rs6000_block_move_inline_limit == 0
4699 && (rs6000_cpu == PROCESSOR_PPCE500MC
4700 || rs6000_cpu == PROCESSOR_PPCE500MC64
4701 || rs6000_cpu == PROCESSOR_PPCE5500
4702 || rs6000_cpu == PROCESSOR_PPCE6500))
4703 rs6000_block_move_inline_limit = 128;
4705 /* store_one_arg depends on expand_block_move to handle at least the
4706 size of reg_parm_stack_space. */
4707 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4708 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4710 if (global_init_p)
4712 /* If the appropriate debug option is enabled, replace the target hooks
4713 with debug versions that call the real version and then prints
4714 debugging information. */
4715 if (TARGET_DEBUG_COST)
4717 targetm.rtx_costs = rs6000_debug_rtx_costs;
4718 targetm.address_cost = rs6000_debug_address_cost;
4719 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4722 if (TARGET_DEBUG_ADDR)
4724 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4725 targetm.legitimize_address = rs6000_debug_legitimize_address;
4726 rs6000_secondary_reload_class_ptr
4727 = rs6000_debug_secondary_reload_class;
4728 rs6000_secondary_memory_needed_ptr
4729 = rs6000_debug_secondary_memory_needed;
4730 rs6000_cannot_change_mode_class_ptr
4731 = rs6000_debug_cannot_change_mode_class;
4732 rs6000_preferred_reload_class_ptr
4733 = rs6000_debug_preferred_reload_class;
4734 rs6000_legitimize_reload_address_ptr
4735 = rs6000_debug_legitimize_reload_address;
4736 rs6000_mode_dependent_address_ptr
4737 = rs6000_debug_mode_dependent_address;
4740 if (rs6000_veclibabi_name)
4742 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4743 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4744 else
4746 error ("unknown vectorization library ABI type (%s) for "
4747 "-mveclibabi= switch", rs6000_veclibabi_name);
4748 ret = false;
4753 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4754 target attribute or pragma which automatically enables both options,
4755 unless the altivec ABI was set. This is set by default for 64-bit, but
4756 not for 32-bit. */
4757 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4758 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4759 | OPTION_MASK_FLOAT128_TYPE
4760 | OPTION_MASK_FLOAT128_KEYWORD)
4761 & ~rs6000_isa_flags_explicit);
4763 /* Enable Altivec ABI for AIX -maltivec. */
4764 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4766 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4767 error ("target attribute or pragma changes AltiVec ABI");
4768 else
4769 rs6000_altivec_abi = 1;
4772 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4773 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4774 be explicitly overridden in either case. */
4775 if (TARGET_ELF)
4777 if (!global_options_set.x_rs6000_altivec_abi
4778 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4780 if (main_target_opt != NULL &&
4781 !main_target_opt->x_rs6000_altivec_abi)
4782 error ("target attribute or pragma changes AltiVec ABI");
4783 else
4784 rs6000_altivec_abi = 1;
4788 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4789 So far, the only darwin64 targets are also MACH-O. */
4790 if (TARGET_MACHO
4791 && DEFAULT_ABI == ABI_DARWIN
4792 && TARGET_64BIT)
4794 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4795 error ("target attribute or pragma changes darwin64 ABI");
4796 else
4798 rs6000_darwin64_abi = 1;
4799 /* Default to natural alignment, for better performance. */
4800 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4804 /* Place FP constants in the constant pool instead of TOC
4805 if section anchors enabled. */
4806 if (flag_section_anchors
4807 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4808 TARGET_NO_FP_IN_TOC = 1;
4810 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4811 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4813 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4814 SUBTARGET_OVERRIDE_OPTIONS;
4815 #endif
4816 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4817 SUBSUBTARGET_OVERRIDE_OPTIONS;
4818 #endif
4819 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4820 SUB3TARGET_OVERRIDE_OPTIONS;
4821 #endif
4823 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4824 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4826 /* For the E500 family of cores, reset the single/double FP flags to let us
4827 check that they remain constant across attributes or pragmas. Also,
4828 clear a possible request for string instructions, not supported and which
4829 we might have silently queried above for -Os.
4831 For other families, clear ISEL in case it was set implicitly.
4834 switch (rs6000_cpu)
4836 case PROCESSOR_PPC8540:
4837 case PROCESSOR_PPC8548:
4838 case PROCESSOR_PPCE500MC:
4839 case PROCESSOR_PPCE500MC64:
4840 case PROCESSOR_PPCE5500:
4841 case PROCESSOR_PPCE6500:
4843 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4844 rs6000_double_float = TARGET_E500_DOUBLE;
4846 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4848 break;
4850 default:
4852 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4853 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4855 break;
4858 if (main_target_opt)
4860 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4861 error ("target attribute or pragma changes single precision floating "
4862 "point");
4863 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4864 error ("target attribute or pragma changes double precision floating "
4865 "point");
4868 /* Detect invalid option combinations with E500. */
4869 CHECK_E500_OPTIONS;
4871 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4872 && rs6000_cpu != PROCESSOR_POWER5
4873 && rs6000_cpu != PROCESSOR_POWER6
4874 && rs6000_cpu != PROCESSOR_POWER7
4875 && rs6000_cpu != PROCESSOR_POWER8
4876 && rs6000_cpu != PROCESSOR_POWER9
4877 && rs6000_cpu != PROCESSOR_PPCA2
4878 && rs6000_cpu != PROCESSOR_CELL
4879 && rs6000_cpu != PROCESSOR_PPC476);
4880 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4881 || rs6000_cpu == PROCESSOR_POWER5
4882 || rs6000_cpu == PROCESSOR_POWER7
4883 || rs6000_cpu == PROCESSOR_POWER8);
4884 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4885 || rs6000_cpu == PROCESSOR_POWER5
4886 || rs6000_cpu == PROCESSOR_POWER6
4887 || rs6000_cpu == PROCESSOR_POWER7
4888 || rs6000_cpu == PROCESSOR_POWER8
4889 || rs6000_cpu == PROCESSOR_POWER9
4890 || rs6000_cpu == PROCESSOR_PPCE500MC
4891 || rs6000_cpu == PROCESSOR_PPCE500MC64
4892 || rs6000_cpu == PROCESSOR_PPCE5500
4893 || rs6000_cpu == PROCESSOR_PPCE6500);
4895 /* Allow debug switches to override the above settings. These are set to -1
4896 in rs6000.opt to indicate the user hasn't directly set the switch. */
4897 if (TARGET_ALWAYS_HINT >= 0)
4898 rs6000_always_hint = TARGET_ALWAYS_HINT;
4900 if (TARGET_SCHED_GROUPS >= 0)
4901 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4903 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4904 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4906 rs6000_sched_restricted_insns_priority
4907 = (rs6000_sched_groups ? 1 : 0);
4909 /* Handle -msched-costly-dep option. */
4910 rs6000_sched_costly_dep
4911 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4913 if (rs6000_sched_costly_dep_str)
4915 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4916 rs6000_sched_costly_dep = no_dep_costly;
4917 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4918 rs6000_sched_costly_dep = all_deps_costly;
4919 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4920 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4921 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4922 rs6000_sched_costly_dep = store_to_load_dep_costly;
4923 else
4924 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4925 atoi (rs6000_sched_costly_dep_str));
4928 /* Handle -minsert-sched-nops option. */
4929 rs6000_sched_insert_nops
4930 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4932 if (rs6000_sched_insert_nops_str)
4934 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4935 rs6000_sched_insert_nops = sched_finish_none;
4936 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4937 rs6000_sched_insert_nops = sched_finish_pad_groups;
4938 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4939 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4940 else
4941 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4942 atoi (rs6000_sched_insert_nops_str));
4945 if (global_init_p)
4947 #ifdef TARGET_REGNAMES
4948 /* If the user desires alternate register names, copy in the
4949 alternate names now. */
4950 if (TARGET_REGNAMES)
4951 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4952 #endif
4954 /* Set aix_struct_return last, after the ABI is determined.
4955 If -maix-struct-return or -msvr4-struct-return was explicitly
4956 used, don't override with the ABI default. */
4957 if (!global_options_set.x_aix_struct_return)
4958 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4960 #if 0
4961 /* IBM XL compiler defaults to unsigned bitfields. */
4962 if (TARGET_XL_COMPAT)
4963 flag_signed_bitfields = 0;
4964 #endif
4966 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4967 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4969 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4971 /* We can only guarantee the availability of DI pseudo-ops when
4972 assembling for 64-bit targets. */
4973 if (!TARGET_64BIT)
4975 targetm.asm_out.aligned_op.di = NULL;
4976 targetm.asm_out.unaligned_op.di = NULL;
4980 /* Set branch target alignment, if not optimizing for size. */
4981 if (!optimize_size)
4983 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4984 aligned 8byte to avoid misprediction by the branch predictor. */
4985 if (rs6000_cpu == PROCESSOR_TITAN
4986 || rs6000_cpu == PROCESSOR_CELL)
4988 if (align_functions <= 0)
4989 align_functions = 8;
4990 if (align_jumps <= 0)
4991 align_jumps = 8;
4992 if (align_loops <= 0)
4993 align_loops = 8;
4995 if (rs6000_align_branch_targets)
4997 if (align_functions <= 0)
4998 align_functions = 16;
4999 if (align_jumps <= 0)
5000 align_jumps = 16;
5001 if (align_loops <= 0)
5003 can_override_loop_align = 1;
5004 align_loops = 16;
5007 if (align_jumps_max_skip <= 0)
5008 align_jumps_max_skip = 15;
5009 if (align_loops_max_skip <= 0)
5010 align_loops_max_skip = 15;
5013 /* Arrange to save and restore machine status around nested functions. */
5014 init_machine_status = rs6000_init_machine_status;
5016 /* We should always be splitting complex arguments, but we can't break
5017 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5018 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5019 targetm.calls.split_complex_arg = NULL;
5021 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5022 if (DEFAULT_ABI == ABI_AIX)
5023 targetm.calls.custom_function_descriptors = 0;
5026 /* Initialize rs6000_cost with the appropriate target costs. */
5027 if (optimize_size)
5028 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5029 else
5030 switch (rs6000_cpu)
5032 case PROCESSOR_RS64A:
5033 rs6000_cost = &rs64a_cost;
5034 break;
5036 case PROCESSOR_MPCCORE:
5037 rs6000_cost = &mpccore_cost;
5038 break;
5040 case PROCESSOR_PPC403:
5041 rs6000_cost = &ppc403_cost;
5042 break;
5044 case PROCESSOR_PPC405:
5045 rs6000_cost = &ppc405_cost;
5046 break;
5048 case PROCESSOR_PPC440:
5049 rs6000_cost = &ppc440_cost;
5050 break;
5052 case PROCESSOR_PPC476:
5053 rs6000_cost = &ppc476_cost;
5054 break;
5056 case PROCESSOR_PPC601:
5057 rs6000_cost = &ppc601_cost;
5058 break;
5060 case PROCESSOR_PPC603:
5061 rs6000_cost = &ppc603_cost;
5062 break;
5064 case PROCESSOR_PPC604:
5065 rs6000_cost = &ppc604_cost;
5066 break;
5068 case PROCESSOR_PPC604e:
5069 rs6000_cost = &ppc604e_cost;
5070 break;
5072 case PROCESSOR_PPC620:
5073 rs6000_cost = &ppc620_cost;
5074 break;
5076 case PROCESSOR_PPC630:
5077 rs6000_cost = &ppc630_cost;
5078 break;
5080 case PROCESSOR_CELL:
5081 rs6000_cost = &ppccell_cost;
5082 break;
5084 case PROCESSOR_PPC750:
5085 case PROCESSOR_PPC7400:
5086 rs6000_cost = &ppc750_cost;
5087 break;
5089 case PROCESSOR_PPC7450:
5090 rs6000_cost = &ppc7450_cost;
5091 break;
5093 case PROCESSOR_PPC8540:
5094 case PROCESSOR_PPC8548:
5095 rs6000_cost = &ppc8540_cost;
5096 break;
5098 case PROCESSOR_PPCE300C2:
5099 case PROCESSOR_PPCE300C3:
5100 rs6000_cost = &ppce300c2c3_cost;
5101 break;
5103 case PROCESSOR_PPCE500MC:
5104 rs6000_cost = &ppce500mc_cost;
5105 break;
5107 case PROCESSOR_PPCE500MC64:
5108 rs6000_cost = &ppce500mc64_cost;
5109 break;
5111 case PROCESSOR_PPCE5500:
5112 rs6000_cost = &ppce5500_cost;
5113 break;
5115 case PROCESSOR_PPCE6500:
5116 rs6000_cost = &ppce6500_cost;
5117 break;
5119 case PROCESSOR_TITAN:
5120 rs6000_cost = &titan_cost;
5121 break;
5123 case PROCESSOR_POWER4:
5124 case PROCESSOR_POWER5:
5125 rs6000_cost = &power4_cost;
5126 break;
5128 case PROCESSOR_POWER6:
5129 rs6000_cost = &power6_cost;
5130 break;
5132 case PROCESSOR_POWER7:
5133 rs6000_cost = &power7_cost;
5134 break;
5136 case PROCESSOR_POWER8:
5137 rs6000_cost = &power8_cost;
5138 break;
5140 case PROCESSOR_POWER9:
5141 rs6000_cost = &power9_cost;
5142 break;
5144 case PROCESSOR_PPCA2:
5145 rs6000_cost = &ppca2_cost;
5146 break;
5148 default:
5149 gcc_unreachable ();
5152 if (global_init_p)
5154 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5155 rs6000_cost->simultaneous_prefetches,
5156 global_options.x_param_values,
5157 global_options_set.x_param_values);
5158 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5159 global_options.x_param_values,
5160 global_options_set.x_param_values);
5161 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5162 rs6000_cost->cache_line_size,
5163 global_options.x_param_values,
5164 global_options_set.x_param_values);
5165 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5166 global_options.x_param_values,
5167 global_options_set.x_param_values);
5169 /* Increase loop peeling limits based on performance analysis. */
5170 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5171 global_options.x_param_values,
5172 global_options_set.x_param_values);
5173 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5174 global_options.x_param_values,
5175 global_options_set.x_param_values);
5177 /* Use the 'model' -fsched-pressure algorithm by default. */
5178 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5179 SCHED_PRESSURE_MODEL,
5180 global_options.x_param_values,
5181 global_options_set.x_param_values);
5183 /* If using typedef char *va_list, signal that
5184 __builtin_va_start (&ap, 0) can be optimized to
5185 ap = __builtin_next_arg (0). */
5186 if (DEFAULT_ABI != ABI_V4)
5187 targetm.expand_builtin_va_start = NULL;
5190 /* Set up single/double float flags.
5191 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5192 then set both flags. */
5193 if (TARGET_HARD_FLOAT && TARGET_FPRS
5194 && rs6000_single_float == 0 && rs6000_double_float == 0)
5195 rs6000_single_float = rs6000_double_float = 1;
5197 /* If not explicitly specified via option, decide whether to generate indexed
5198 load/store instructions. */
5199 if (TARGET_AVOID_XFORM == -1)
5200 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5201 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5202 need indexed accesses and the type used is the scalar type of the element
5203 being loaded or stored. */
5204 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5205 && !TARGET_ALTIVEC);
5207 /* Set the -mrecip options. */
5208 if (rs6000_recip_name)
5210 char *p = ASTRDUP (rs6000_recip_name);
5211 char *q;
5212 unsigned int mask, i;
5213 bool invert;
5215 while ((q = strtok (p, ",")) != NULL)
5217 p = NULL;
5218 if (*q == '!')
5220 invert = true;
5221 q++;
5223 else
5224 invert = false;
5226 if (!strcmp (q, "default"))
5227 mask = ((TARGET_RECIP_PRECISION)
5228 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5229 else
5231 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5232 if (!strcmp (q, recip_options[i].string))
5234 mask = recip_options[i].mask;
5235 break;
5238 if (i == ARRAY_SIZE (recip_options))
5240 error ("unknown option for -mrecip=%s", q);
5241 invert = false;
5242 mask = 0;
5243 ret = false;
5247 if (invert)
5248 rs6000_recip_control &= ~mask;
5249 else
5250 rs6000_recip_control |= mask;
5254 /* Set the builtin mask of the various options used that could affect which
5255 builtins were used. In the past we used target_flags, but we've run out
5256 of bits, and some options like SPE and PAIRED are no longer in
5257 target_flags. */
5258 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5259 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5260 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5261 rs6000_builtin_mask);
5263 /* Initialize all of the registers. */
5264 rs6000_init_hard_regno_mode_ok (global_init_p);
5266 /* Save the initial options in case the user does function specific options */
5267 if (global_init_p)
5268 target_option_default_node = target_option_current_node
5269 = build_target_option_node (&global_options);
5271 /* If not explicitly specified via option, decide whether to generate the
5272 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5273 if (TARGET_LINK_STACK == -1)
5274 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5276 return ret;
5279 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5280 define the target cpu type. */
5282 static void
5283 rs6000_option_override (void)
5285 (void) rs6000_option_override_internal (true);
5289 /* Implement targetm.vectorize.builtin_mask_for_load. */
5290 static tree
5291 rs6000_builtin_mask_for_load (void)
5293 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5294 if ((TARGET_ALTIVEC && !TARGET_VSX)
5295 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5296 return altivec_builtin_mask_for_load;
5297 else
5298 return 0;
5301 /* Implement LOOP_ALIGN. */
5303 rs6000_loop_align (rtx label)
5305 basic_block bb;
5306 int ninsns;
5308 /* Don't override loop alignment if -falign-loops was specified. */
5309 if (!can_override_loop_align)
5310 return align_loops_log;
5312 bb = BLOCK_FOR_INSN (label);
5313 ninsns = num_loop_insns(bb->loop_father);
5315 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5316 if (ninsns > 4 && ninsns <= 8
5317 && (rs6000_cpu == PROCESSOR_POWER4
5318 || rs6000_cpu == PROCESSOR_POWER5
5319 || rs6000_cpu == PROCESSOR_POWER6
5320 || rs6000_cpu == PROCESSOR_POWER7
5321 || rs6000_cpu == PROCESSOR_POWER8
5322 || rs6000_cpu == PROCESSOR_POWER9))
5323 return 5;
5324 else
5325 return align_loops_log;
5328 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5329 static int
5330 rs6000_loop_align_max_skip (rtx_insn *label)
5332 return (1 << rs6000_loop_align (label)) - 1;
5335 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5336 after applying N number of iterations. This routine does not determine
5337 how may iterations are required to reach desired alignment. */
5339 static bool
5340 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5342 if (is_packed)
5343 return false;
5345 if (TARGET_32BIT)
5347 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5348 return true;
5350 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5351 return true;
5353 return false;
5355 else
5357 if (TARGET_MACHO)
5358 return false;
5360 /* Assuming that all other types are naturally aligned. CHECKME! */
5361 return true;
5365 /* Return true if the vector misalignment factor is supported by the
5366 target. */
5367 static bool
5368 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5369 const_tree type,
5370 int misalignment,
5371 bool is_packed)
5373 if (TARGET_VSX)
5375 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5376 return true;
5378 /* Return if movmisalign pattern is not supported for this mode. */
5379 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5380 return false;
5382 if (misalignment == -1)
5384 /* Misalignment factor is unknown at compile time but we know
5385 it's word aligned. */
5386 if (rs6000_vector_alignment_reachable (type, is_packed))
5388 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5390 if (element_size == 64 || element_size == 32)
5391 return true;
5394 return false;
5397 /* VSX supports word-aligned vector. */
5398 if (misalignment % 4 == 0)
5399 return true;
5401 return false;
5404 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5405 static int
5406 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5407 tree vectype, int misalign)
5409 unsigned elements;
5410 tree elem_type;
5412 switch (type_of_cost)
5414 case scalar_stmt:
5415 case scalar_load:
5416 case scalar_store:
5417 case vector_stmt:
5418 case vector_load:
5419 case vector_store:
5420 case vec_to_scalar:
5421 case scalar_to_vec:
5422 case cond_branch_not_taken:
5423 return 1;
5425 case vec_perm:
5426 if (TARGET_VSX)
5427 return 3;
5428 else
5429 return 1;
5431 case vec_promote_demote:
5432 if (TARGET_VSX)
5433 return 4;
5434 else
5435 return 1;
5437 case cond_branch_taken:
5438 return 3;
5440 case unaligned_load:
5441 if (TARGET_P9_VECTOR)
5442 return 3;
5444 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5445 return 1;
5447 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5449 elements = TYPE_VECTOR_SUBPARTS (vectype);
5450 if (elements == 2)
5451 /* Double word aligned. */
5452 return 2;
5454 if (elements == 4)
5456 switch (misalign)
5458 case 8:
5459 /* Double word aligned. */
5460 return 2;
5462 case -1:
5463 /* Unknown misalignment. */
5464 case 4:
5465 case 12:
5466 /* Word aligned. */
5467 return 22;
5469 default:
5470 gcc_unreachable ();
5475 if (TARGET_ALTIVEC)
5476 /* Misaligned loads are not supported. */
5477 gcc_unreachable ();
5479 return 2;
5481 case unaligned_store:
5482 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5483 return 1;
5485 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5487 elements = TYPE_VECTOR_SUBPARTS (vectype);
5488 if (elements == 2)
5489 /* Double word aligned. */
5490 return 2;
5492 if (elements == 4)
5494 switch (misalign)
5496 case 8:
5497 /* Double word aligned. */
5498 return 2;
5500 case -1:
5501 /* Unknown misalignment. */
5502 case 4:
5503 case 12:
5504 /* Word aligned. */
5505 return 23;
5507 default:
5508 gcc_unreachable ();
5513 if (TARGET_ALTIVEC)
5514 /* Misaligned stores are not supported. */
5515 gcc_unreachable ();
5517 return 2;
5519 case vec_construct:
5520 /* This is a rough approximation assuming non-constant elements
5521 constructed into a vector via element insertion. FIXME:
5522 vec_construct is not granular enough for uniformly good
5523 decisions. If the initialization is a splat, this is
5524 cheaper than we estimate. Improve this someday. */
5525 elem_type = TREE_TYPE (vectype);
5526 /* 32-bit vectors loaded into registers are stored as double
5527 precision, so we need 2 permutes, 2 converts, and 1 merge
5528 to construct a vector of short floats from them. */
5529 if (SCALAR_FLOAT_TYPE_P (elem_type)
5530 && TYPE_PRECISION (elem_type) == 32)
5531 return 5;
5532 else
5533 return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
5535 default:
5536 gcc_unreachable ();
5540 /* Implement targetm.vectorize.preferred_simd_mode. */
5542 static machine_mode
5543 rs6000_preferred_simd_mode (machine_mode mode)
5545 if (TARGET_VSX)
5546 switch (mode)
5548 case DFmode:
5549 return V2DFmode;
5550 default:;
5552 if (TARGET_ALTIVEC || TARGET_VSX)
5553 switch (mode)
5555 case SFmode:
5556 return V4SFmode;
5557 case TImode:
5558 return V1TImode;
5559 case DImode:
5560 return V2DImode;
5561 case SImode:
5562 return V4SImode;
5563 case HImode:
5564 return V8HImode;
5565 case QImode:
5566 return V16QImode;
5567 default:;
5569 if (TARGET_SPE)
5570 switch (mode)
5572 case SFmode:
5573 return V2SFmode;
5574 case SImode:
5575 return V2SImode;
5576 default:;
5578 if (TARGET_PAIRED_FLOAT
5579 && mode == SFmode)
5580 return V2SFmode;
5581 return word_mode;
5584 typedef struct _rs6000_cost_data
5586 struct loop *loop_info;
5587 unsigned cost[3];
5588 } rs6000_cost_data;
5590 /* Test for likely overcommitment of vector hardware resources. If a
5591 loop iteration is relatively large, and too large a percentage of
5592 instructions in the loop are vectorized, the cost model may not
5593 adequately reflect delays from unavailable vector resources.
5594 Penalize the loop body cost for this case. */
5596 static void
5597 rs6000_density_test (rs6000_cost_data *data)
5599 const int DENSITY_PCT_THRESHOLD = 85;
5600 const int DENSITY_SIZE_THRESHOLD = 70;
5601 const int DENSITY_PENALTY = 10;
5602 struct loop *loop = data->loop_info;
5603 basic_block *bbs = get_loop_body (loop);
5604 int nbbs = loop->num_nodes;
5605 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5606 int i, density_pct;
5608 for (i = 0; i < nbbs; i++)
5610 basic_block bb = bbs[i];
5611 gimple_stmt_iterator gsi;
5613 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5615 gimple *stmt = gsi_stmt (gsi);
5616 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5618 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5619 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5620 not_vec_cost++;
5624 free (bbs);
5625 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5627 if (density_pct > DENSITY_PCT_THRESHOLD
5628 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5630 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5631 if (dump_enabled_p ())
5632 dump_printf_loc (MSG_NOTE, vect_location,
5633 "density %d%%, cost %d exceeds threshold, penalizing "
5634 "loop body cost by %d%%", density_pct,
5635 vec_cost + not_vec_cost, DENSITY_PENALTY);
5639 /* Implement targetm.vectorize.init_cost. */
5641 static void *
5642 rs6000_init_cost (struct loop *loop_info)
5644 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5645 data->loop_info = loop_info;
5646 data->cost[vect_prologue] = 0;
5647 data->cost[vect_body] = 0;
5648 data->cost[vect_epilogue] = 0;
5649 return data;
5652 /* Implement targetm.vectorize.add_stmt_cost. */
5654 static unsigned
5655 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5656 struct _stmt_vec_info *stmt_info, int misalign,
5657 enum vect_cost_model_location where)
5659 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5660 unsigned retval = 0;
5662 if (flag_vect_cost_model)
5664 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5665 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5666 misalign);
5667 /* Statements in an inner loop relative to the loop being
5668 vectorized are weighted more heavily. The value here is
5669 arbitrary and could potentially be improved with analysis. */
5670 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5671 count *= 50; /* FIXME. */
5673 retval = (unsigned) (count * stmt_cost);
5674 cost_data->cost[where] += retval;
5677 return retval;
5680 /* Implement targetm.vectorize.finish_cost. */
5682 static void
5683 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5684 unsigned *body_cost, unsigned *epilogue_cost)
5686 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5688 if (cost_data->loop_info)
5689 rs6000_density_test (cost_data);
5691 *prologue_cost = cost_data->cost[vect_prologue];
5692 *body_cost = cost_data->cost[vect_body];
5693 *epilogue_cost = cost_data->cost[vect_epilogue];
5696 /* Implement targetm.vectorize.destroy_cost_data. */
5698 static void
5699 rs6000_destroy_cost_data (void *data)
5701 free (data);
5704 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5705 library with vectorized intrinsics. */
5707 static tree
5708 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5709 tree type_in)
5711 char name[32];
5712 const char *suffix = NULL;
5713 tree fntype, new_fndecl, bdecl = NULL_TREE;
5714 int n_args = 1;
5715 const char *bname;
5716 machine_mode el_mode, in_mode;
5717 int n, in_n;
5719 /* Libmass is suitable for unsafe math only as it does not correctly support
5720 parts of IEEE with the required precision such as denormals. Only support
5721 it if we have VSX to use the simd d2 or f4 functions.
5722 XXX: Add variable length support. */
5723 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5724 return NULL_TREE;
5726 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5727 n = TYPE_VECTOR_SUBPARTS (type_out);
5728 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5729 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5730 if (el_mode != in_mode
5731 || n != in_n)
5732 return NULL_TREE;
5734 switch (fn)
5736 CASE_CFN_ATAN2:
5737 CASE_CFN_HYPOT:
5738 CASE_CFN_POW:
5739 n_args = 2;
5740 gcc_fallthrough ();
5742 CASE_CFN_ACOS:
5743 CASE_CFN_ACOSH:
5744 CASE_CFN_ASIN:
5745 CASE_CFN_ASINH:
5746 CASE_CFN_ATAN:
5747 CASE_CFN_ATANH:
5748 CASE_CFN_CBRT:
5749 CASE_CFN_COS:
5750 CASE_CFN_COSH:
5751 CASE_CFN_ERF:
5752 CASE_CFN_ERFC:
5753 CASE_CFN_EXP2:
5754 CASE_CFN_EXP:
5755 CASE_CFN_EXPM1:
5756 CASE_CFN_LGAMMA:
5757 CASE_CFN_LOG10:
5758 CASE_CFN_LOG1P:
5759 CASE_CFN_LOG2:
5760 CASE_CFN_LOG:
5761 CASE_CFN_SIN:
5762 CASE_CFN_SINH:
5763 CASE_CFN_SQRT:
5764 CASE_CFN_TAN:
5765 CASE_CFN_TANH:
5766 if (el_mode == DFmode && n == 2)
5768 bdecl = mathfn_built_in (double_type_node, fn);
5769 suffix = "d2"; /* pow -> powd2 */
5771 else if (el_mode == SFmode && n == 4)
5773 bdecl = mathfn_built_in (float_type_node, fn);
5774 suffix = "4"; /* powf -> powf4 */
5776 else
5777 return NULL_TREE;
5778 if (!bdecl)
5779 return NULL_TREE;
5780 break;
5782 default:
5783 return NULL_TREE;
5786 gcc_assert (suffix != NULL);
5787 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5788 if (!bname)
5789 return NULL_TREE;
5791 strcpy (name, bname + sizeof ("__builtin_") - 1);
5792 strcat (name, suffix);
5794 if (n_args == 1)
5795 fntype = build_function_type_list (type_out, type_in, NULL);
5796 else if (n_args == 2)
5797 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5798 else
5799 gcc_unreachable ();
5801 /* Build a function declaration for the vectorized function. */
5802 new_fndecl = build_decl (BUILTINS_LOCATION,
5803 FUNCTION_DECL, get_identifier (name), fntype);
5804 TREE_PUBLIC (new_fndecl) = 1;
5805 DECL_EXTERNAL (new_fndecl) = 1;
5806 DECL_IS_NOVOPS (new_fndecl) = 1;
5807 TREE_READONLY (new_fndecl) = 1;
5809 return new_fndecl;
5812 /* Returns a function decl for a vectorized version of the builtin function
5813 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5814 if it is not available. */
5816 static tree
5817 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5818 tree type_in)
5820 machine_mode in_mode, out_mode;
5821 int in_n, out_n;
5823 if (TARGET_DEBUG_BUILTIN)
5824 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5825 combined_fn_name (combined_fn (fn)),
5826 GET_MODE_NAME (TYPE_MODE (type_out)),
5827 GET_MODE_NAME (TYPE_MODE (type_in)));
5829 if (TREE_CODE (type_out) != VECTOR_TYPE
5830 || TREE_CODE (type_in) != VECTOR_TYPE
5831 || !TARGET_VECTORIZE_BUILTINS)
5832 return NULL_TREE;
5834 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5835 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5836 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5837 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5839 switch (fn)
5841 CASE_CFN_COPYSIGN:
5842 if (VECTOR_UNIT_VSX_P (V2DFmode)
5843 && out_mode == DFmode && out_n == 2
5844 && in_mode == DFmode && in_n == 2)
5845 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5846 if (VECTOR_UNIT_VSX_P (V4SFmode)
5847 && out_mode == SFmode && out_n == 4
5848 && in_mode == SFmode && in_n == 4)
5849 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5850 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5851 && out_mode == SFmode && out_n == 4
5852 && in_mode == SFmode && in_n == 4)
5853 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5854 break;
5855 CASE_CFN_CEIL:
5856 if (VECTOR_UNIT_VSX_P (V2DFmode)
5857 && out_mode == DFmode && out_n == 2
5858 && in_mode == DFmode && in_n == 2)
5859 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5860 if (VECTOR_UNIT_VSX_P (V4SFmode)
5861 && out_mode == SFmode && out_n == 4
5862 && in_mode == SFmode && in_n == 4)
5863 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5864 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5865 && out_mode == SFmode && out_n == 4
5866 && in_mode == SFmode && in_n == 4)
5867 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5868 break;
5869 CASE_CFN_FLOOR:
5870 if (VECTOR_UNIT_VSX_P (V2DFmode)
5871 && out_mode == DFmode && out_n == 2
5872 && in_mode == DFmode && in_n == 2)
5873 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5874 if (VECTOR_UNIT_VSX_P (V4SFmode)
5875 && out_mode == SFmode && out_n == 4
5876 && in_mode == SFmode && in_n == 4)
5877 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5878 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5879 && out_mode == SFmode && out_n == 4
5880 && in_mode == SFmode && in_n == 4)
5881 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5882 break;
5883 CASE_CFN_FMA:
5884 if (VECTOR_UNIT_VSX_P (V2DFmode)
5885 && out_mode == DFmode && out_n == 2
5886 && in_mode == DFmode && in_n == 2)
5887 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5888 if (VECTOR_UNIT_VSX_P (V4SFmode)
5889 && out_mode == SFmode && out_n == 4
5890 && in_mode == SFmode && in_n == 4)
5891 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5892 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5893 && out_mode == SFmode && out_n == 4
5894 && in_mode == SFmode && in_n == 4)
5895 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5896 break;
5897 CASE_CFN_TRUNC:
5898 if (VECTOR_UNIT_VSX_P (V2DFmode)
5899 && out_mode == DFmode && out_n == 2
5900 && in_mode == DFmode && in_n == 2)
5901 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5902 if (VECTOR_UNIT_VSX_P (V4SFmode)
5903 && out_mode == SFmode && out_n == 4
5904 && in_mode == SFmode && in_n == 4)
5905 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5906 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5907 && out_mode == SFmode && out_n == 4
5908 && in_mode == SFmode && in_n == 4)
5909 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5910 break;
5911 CASE_CFN_NEARBYINT:
5912 if (VECTOR_UNIT_VSX_P (V2DFmode)
5913 && flag_unsafe_math_optimizations
5914 && out_mode == DFmode && out_n == 2
5915 && in_mode == DFmode && in_n == 2)
5916 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5917 if (VECTOR_UNIT_VSX_P (V4SFmode)
5918 && flag_unsafe_math_optimizations
5919 && out_mode == SFmode && out_n == 4
5920 && in_mode == SFmode && in_n == 4)
5921 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5922 break;
5923 CASE_CFN_RINT:
5924 if (VECTOR_UNIT_VSX_P (V2DFmode)
5925 && !flag_trapping_math
5926 && out_mode == DFmode && out_n == 2
5927 && in_mode == DFmode && in_n == 2)
5928 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5929 if (VECTOR_UNIT_VSX_P (V4SFmode)
5930 && !flag_trapping_math
5931 && out_mode == SFmode && out_n == 4
5932 && in_mode == SFmode && in_n == 4)
5933 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5934 break;
5935 default:
5936 break;
5939 /* Generate calls to libmass if appropriate. */
5940 if (rs6000_veclib_handler)
5941 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5943 return NULL_TREE;
5946 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5948 static tree
5949 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5950 tree type_in)
5952 machine_mode in_mode, out_mode;
5953 int in_n, out_n;
5955 if (TARGET_DEBUG_BUILTIN)
5956 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5957 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5958 GET_MODE_NAME (TYPE_MODE (type_out)),
5959 GET_MODE_NAME (TYPE_MODE (type_in)));
5961 if (TREE_CODE (type_out) != VECTOR_TYPE
5962 || TREE_CODE (type_in) != VECTOR_TYPE
5963 || !TARGET_VECTORIZE_BUILTINS)
5964 return NULL_TREE;
5966 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5967 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5968 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5969 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5971 enum rs6000_builtins fn
5972 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5973 switch (fn)
5975 case RS6000_BUILTIN_RSQRTF:
5976 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5977 && out_mode == SFmode && out_n == 4
5978 && in_mode == SFmode && in_n == 4)
5979 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5980 break;
5981 case RS6000_BUILTIN_RSQRT:
5982 if (VECTOR_UNIT_VSX_P (V2DFmode)
5983 && out_mode == DFmode && out_n == 2
5984 && in_mode == DFmode && in_n == 2)
5985 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5986 break;
5987 case RS6000_BUILTIN_RECIPF:
5988 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5989 && out_mode == SFmode && out_n == 4
5990 && in_mode == SFmode && in_n == 4)
5991 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5992 break;
5993 case RS6000_BUILTIN_RECIP:
5994 if (VECTOR_UNIT_VSX_P (V2DFmode)
5995 && out_mode == DFmode && out_n == 2
5996 && in_mode == DFmode && in_n == 2)
5997 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5998 break;
5999 default:
6000 break;
6002 return NULL_TREE;
6005 /* Default CPU string for rs6000*_file_start functions. */
6006 static const char *rs6000_default_cpu;
6008 /* Do anything needed at the start of the asm file. */
6010 static void
6011 rs6000_file_start (void)
6013 char buffer[80];
6014 const char *start = buffer;
6015 FILE *file = asm_out_file;
6017 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6019 default_file_start ();
6021 if (flag_verbose_asm)
6023 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6025 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6027 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6028 start = "";
6031 if (global_options_set.x_rs6000_cpu_index)
6033 fprintf (file, "%s -mcpu=%s", start,
6034 processor_target_table[rs6000_cpu_index].name);
6035 start = "";
6038 if (global_options_set.x_rs6000_tune_index)
6040 fprintf (file, "%s -mtune=%s", start,
6041 processor_target_table[rs6000_tune_index].name);
6042 start = "";
6045 if (PPC405_ERRATUM77)
6047 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6048 start = "";
6051 #ifdef USING_ELFOS_H
6052 switch (rs6000_sdata)
6054 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6055 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6056 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6057 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6060 if (rs6000_sdata && g_switch_value)
6062 fprintf (file, "%s -G %d", start,
6063 g_switch_value);
6064 start = "";
6066 #endif
6068 if (*start == '\0')
6069 putc ('\n', file);
6072 #ifdef USING_ELFOS_H
6073 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6074 && !global_options_set.x_rs6000_cpu_index)
6076 fputs ("\t.machine ", asm_out_file);
6077 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6078 fputs ("power9\n", asm_out_file);
6079 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6080 fputs ("power8\n", asm_out_file);
6081 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6082 fputs ("power7\n", asm_out_file);
6083 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6084 fputs ("power6\n", asm_out_file);
6085 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6086 fputs ("power5\n", asm_out_file);
6087 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6088 fputs ("power4\n", asm_out_file);
6089 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6090 fputs ("ppc64\n", asm_out_file);
6091 else
6092 fputs ("ppc\n", asm_out_file);
6094 #endif
6096 if (DEFAULT_ABI == ABI_ELFv2)
6097 fprintf (file, "\t.abiversion 2\n");
6101 /* Return nonzero if this function is known to have a null epilogue. */
6104 direct_return (void)
6106 if (reload_completed)
6108 rs6000_stack_t *info = rs6000_stack_info ();
6110 if (info->first_gp_reg_save == 32
6111 && info->first_fp_reg_save == 64
6112 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6113 && ! info->lr_save_p
6114 && ! info->cr_save_p
6115 && info->vrsave_size == 0
6116 && ! info->push_p)
6117 return 1;
6120 return 0;
6123 /* Return the number of instructions it takes to form a constant in an
6124 integer register. */
6127 num_insns_constant_wide (HOST_WIDE_INT value)
6129 /* signed constant loadable with addi */
6130 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6131 return 1;
6133 /* constant loadable with addis */
6134 else if ((value & 0xffff) == 0
6135 && (value >> 31 == -1 || value >> 31 == 0))
6136 return 1;
6138 else if (TARGET_POWERPC64)
6140 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6141 HOST_WIDE_INT high = value >> 31;
6143 if (high == 0 || high == -1)
6144 return 2;
6146 high >>= 1;
6148 if (low == 0)
6149 return num_insns_constant_wide (high) + 1;
6150 else if (high == 0)
6151 return num_insns_constant_wide (low) + 1;
6152 else
6153 return (num_insns_constant_wide (high)
6154 + num_insns_constant_wide (low) + 1);
6157 else
6158 return 2;
6162 num_insns_constant (rtx op, machine_mode mode)
6164 HOST_WIDE_INT low, high;
6166 switch (GET_CODE (op))
6168 case CONST_INT:
6169 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6170 && rs6000_is_valid_and_mask (op, mode))
6171 return 2;
6172 else
6173 return num_insns_constant_wide (INTVAL (op));
6175 case CONST_WIDE_INT:
6177 int i;
6178 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6179 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6180 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6181 return ins;
6184 case CONST_DOUBLE:
6185 if (mode == SFmode || mode == SDmode)
6187 long l;
6189 if (DECIMAL_FLOAT_MODE_P (mode))
6190 REAL_VALUE_TO_TARGET_DECIMAL32
6191 (*CONST_DOUBLE_REAL_VALUE (op), l);
6192 else
6193 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6194 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6197 long l[2];
6198 if (DECIMAL_FLOAT_MODE_P (mode))
6199 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6200 else
6201 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6202 high = l[WORDS_BIG_ENDIAN == 0];
6203 low = l[WORDS_BIG_ENDIAN != 0];
6205 if (TARGET_32BIT)
6206 return (num_insns_constant_wide (low)
6207 + num_insns_constant_wide (high));
6208 else
6210 if ((high == 0 && low >= 0)
6211 || (high == -1 && low < 0))
6212 return num_insns_constant_wide (low);
6214 else if (rs6000_is_valid_and_mask (op, mode))
6215 return 2;
6217 else if (low == 0)
6218 return num_insns_constant_wide (high) + 1;
6220 else
6221 return (num_insns_constant_wide (high)
6222 + num_insns_constant_wide (low) + 1);
6225 default:
6226 gcc_unreachable ();
6230 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6231 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6232 corresponding element of the vector, but for V4SFmode and V2SFmode,
6233 the corresponding "float" is interpreted as an SImode integer. */
6235 HOST_WIDE_INT
6236 const_vector_elt_as_int (rtx op, unsigned int elt)
6238 rtx tmp;
6240 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6241 gcc_assert (GET_MODE (op) != V2DImode
6242 && GET_MODE (op) != V2DFmode);
6244 tmp = CONST_VECTOR_ELT (op, elt);
6245 if (GET_MODE (op) == V4SFmode
6246 || GET_MODE (op) == V2SFmode)
6247 tmp = gen_lowpart (SImode, tmp);
6248 return INTVAL (tmp);
6251 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6252 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6253 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6254 all items are set to the same value and contain COPIES replicas of the
6255 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6256 operand and the others are set to the value of the operand's msb. */
6258 static bool
6259 vspltis_constant (rtx op, unsigned step, unsigned copies)
6261 machine_mode mode = GET_MODE (op);
6262 machine_mode inner = GET_MODE_INNER (mode);
6264 unsigned i;
6265 unsigned nunits;
6266 unsigned bitsize;
6267 unsigned mask;
6269 HOST_WIDE_INT val;
6270 HOST_WIDE_INT splat_val;
6271 HOST_WIDE_INT msb_val;
6273 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6274 return false;
6276 nunits = GET_MODE_NUNITS (mode);
6277 bitsize = GET_MODE_BITSIZE (inner);
6278 mask = GET_MODE_MASK (inner);
6280 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6281 splat_val = val;
6282 msb_val = val >= 0 ? 0 : -1;
6284 /* Construct the value to be splatted, if possible. If not, return 0. */
6285 for (i = 2; i <= copies; i *= 2)
6287 HOST_WIDE_INT small_val;
6288 bitsize /= 2;
6289 small_val = splat_val >> bitsize;
6290 mask >>= bitsize;
6291 if (splat_val != ((HOST_WIDE_INT)
6292 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6293 | (small_val & mask)))
6294 return false;
6295 splat_val = small_val;
6298 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6299 if (EASY_VECTOR_15 (splat_val))
6302 /* Also check if we can splat, and then add the result to itself. Do so if
6303 the value is positive, of if the splat instruction is using OP's mode;
6304 for splat_val < 0, the splat and the add should use the same mode. */
6305 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6306 && (splat_val >= 0 || (step == 1 && copies == 1)))
6309 /* Also check if are loading up the most significant bit which can be done by
6310 loading up -1 and shifting the value left by -1. */
6311 else if (EASY_VECTOR_MSB (splat_val, inner))
6314 else
6315 return false;
6317 /* Check if VAL is present in every STEP-th element, and the
6318 other elements are filled with its most significant bit. */
6319 for (i = 1; i < nunits; ++i)
6321 HOST_WIDE_INT desired_val;
6322 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6323 if ((i & (step - 1)) == 0)
6324 desired_val = val;
6325 else
6326 desired_val = msb_val;
6328 if (desired_val != const_vector_elt_as_int (op, elt))
6329 return false;
6332 return true;
6335 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6336 instruction, filling in the bottom elements with 0 or -1.
6338 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6339 for the number of zeroes to shift in, or negative for the number of 0xff
6340 bytes to shift in.
6342 OP is a CONST_VECTOR. */
6345 vspltis_shifted (rtx op)
6347 machine_mode mode = GET_MODE (op);
6348 machine_mode inner = GET_MODE_INNER (mode);
6350 unsigned i, j;
6351 unsigned nunits;
6352 unsigned mask;
6354 HOST_WIDE_INT val;
6356 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6357 return false;
6359 /* We need to create pseudo registers to do the shift, so don't recognize
6360 shift vector constants after reload. */
6361 if (!can_create_pseudo_p ())
6362 return false;
6364 nunits = GET_MODE_NUNITS (mode);
6365 mask = GET_MODE_MASK (inner);
6367 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6369 /* Check if the value can really be the operand of a vspltis[bhw]. */
6370 if (EASY_VECTOR_15 (val))
6373 /* Also check if we are loading up the most significant bit which can be done
6374 by loading up -1 and shifting the value left by -1. */
6375 else if (EASY_VECTOR_MSB (val, inner))
6378 else
6379 return 0;
6381 /* Check if VAL is present in every STEP-th element until we find elements
6382 that are 0 or all 1 bits. */
6383 for (i = 1; i < nunits; ++i)
6385 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6386 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6388 /* If the value isn't the splat value, check for the remaining elements
6389 being 0/-1. */
6390 if (val != elt_val)
6392 if (elt_val == 0)
6394 for (j = i+1; j < nunits; ++j)
6396 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6397 if (const_vector_elt_as_int (op, elt2) != 0)
6398 return 0;
6401 return (nunits - i) * GET_MODE_SIZE (inner);
6404 else if ((elt_val & mask) == mask)
6406 for (j = i+1; j < nunits; ++j)
6408 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6409 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6410 return 0;
6413 return -((nunits - i) * GET_MODE_SIZE (inner));
6416 else
6417 return 0;
6421 /* If all elements are equal, we don't need to do VLSDOI. */
6422 return 0;
6426 /* Return true if OP is of the given MODE and can be synthesized
6427 with a vspltisb, vspltish or vspltisw. */
6429 bool
6430 easy_altivec_constant (rtx op, machine_mode mode)
6432 unsigned step, copies;
6434 if (mode == VOIDmode)
6435 mode = GET_MODE (op);
6436 else if (mode != GET_MODE (op))
6437 return false;
6439 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6440 constants. */
6441 if (mode == V2DFmode)
6442 return zero_constant (op, mode);
6444 else if (mode == V2DImode)
6446 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6447 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6448 return false;
6450 if (zero_constant (op, mode))
6451 return true;
6453 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6454 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6455 return true;
6457 return false;
6460 /* V1TImode is a special container for TImode. Ignore for now. */
6461 else if (mode == V1TImode)
6462 return false;
6464 /* Start with a vspltisw. */
6465 step = GET_MODE_NUNITS (mode) / 4;
6466 copies = 1;
6468 if (vspltis_constant (op, step, copies))
6469 return true;
6471 /* Then try with a vspltish. */
6472 if (step == 1)
6473 copies <<= 1;
6474 else
6475 step >>= 1;
6477 if (vspltis_constant (op, step, copies))
6478 return true;
6480 /* And finally a vspltisb. */
6481 if (step == 1)
6482 copies <<= 1;
6483 else
6484 step >>= 1;
6486 if (vspltis_constant (op, step, copies))
6487 return true;
6489 if (vspltis_shifted (op) != 0)
6490 return true;
6492 return false;
6495 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6496 result is OP. Abort if it is not possible. */
6499 gen_easy_altivec_constant (rtx op)
6501 machine_mode mode = GET_MODE (op);
6502 int nunits = GET_MODE_NUNITS (mode);
6503 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6504 unsigned step = nunits / 4;
6505 unsigned copies = 1;
6507 /* Start with a vspltisw. */
6508 if (vspltis_constant (op, step, copies))
6509 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6511 /* Then try with a vspltish. */
6512 if (step == 1)
6513 copies <<= 1;
6514 else
6515 step >>= 1;
6517 if (vspltis_constant (op, step, copies))
6518 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6520 /* And finally a vspltisb. */
6521 if (step == 1)
6522 copies <<= 1;
6523 else
6524 step >>= 1;
6526 if (vspltis_constant (op, step, copies))
6527 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6529 gcc_unreachable ();
6532 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6533 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6535 Return the number of instructions needed (1 or 2) into the address pointed
6536 via NUM_INSNS_PTR.
6538 Return the constant that is being split via CONSTANT_PTR. */
6540 bool
6541 xxspltib_constant_p (rtx op,
6542 machine_mode mode,
6543 int *num_insns_ptr,
6544 int *constant_ptr)
6546 size_t nunits = GET_MODE_NUNITS (mode);
6547 size_t i;
6548 HOST_WIDE_INT value;
6549 rtx element;
6551 /* Set the returned values to out of bound values. */
6552 *num_insns_ptr = -1;
6553 *constant_ptr = 256;
6555 if (!TARGET_P9_VECTOR)
6556 return false;
6558 if (mode == VOIDmode)
6559 mode = GET_MODE (op);
6561 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6562 return false;
6564 /* Handle (vec_duplicate <constant>). */
6565 if (GET_CODE (op) == VEC_DUPLICATE)
6567 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6568 && mode != V2DImode)
6569 return false;
6571 element = XEXP (op, 0);
6572 if (!CONST_INT_P (element))
6573 return false;
6575 value = INTVAL (element);
6576 if (!IN_RANGE (value, -128, 127))
6577 return false;
6580 /* Handle (const_vector [...]). */
6581 else if (GET_CODE (op) == CONST_VECTOR)
6583 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6584 && mode != V2DImode)
6585 return false;
6587 element = CONST_VECTOR_ELT (op, 0);
6588 if (!CONST_INT_P (element))
6589 return false;
6591 value = INTVAL (element);
6592 if (!IN_RANGE (value, -128, 127))
6593 return false;
6595 for (i = 1; i < nunits; i++)
6597 element = CONST_VECTOR_ELT (op, i);
6598 if (!CONST_INT_P (element))
6599 return false;
6601 if (value != INTVAL (element))
6602 return false;
6606 /* Handle integer constants being loaded into the upper part of the VSX
6607 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6608 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6609 else if (CONST_INT_P (op))
6611 if (!SCALAR_INT_MODE_P (mode))
6612 return false;
6614 value = INTVAL (op);
6615 if (!IN_RANGE (value, -128, 127))
6616 return false;
6618 if (!IN_RANGE (value, -1, 0))
6620 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6621 return false;
6623 if (EASY_VECTOR_15 (value))
6624 return false;
6628 else
6629 return false;
6631 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6632 sign extend. Special case 0/-1 to allow getting any VSX register instead
6633 of an Altivec register. */
6634 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6635 && EASY_VECTOR_15 (value))
6636 return false;
6638 /* Return # of instructions and the constant byte for XXSPLTIB. */
6639 if (mode == V16QImode)
6640 *num_insns_ptr = 1;
6642 else if (IN_RANGE (value, -1, 0))
6643 *num_insns_ptr = 1;
6645 else
6646 *num_insns_ptr = 2;
6648 *constant_ptr = (int) value;
6649 return true;
6652 const char *
6653 output_vec_const_move (rtx *operands)
6655 int cst, cst2, shift;
6656 machine_mode mode;
6657 rtx dest, vec;
6659 dest = operands[0];
6660 vec = operands[1];
6661 mode = GET_MODE (dest);
6663 if (TARGET_VSX)
6665 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6666 int xxspltib_value = 256;
6667 int num_insns = -1;
6669 if (zero_constant (vec, mode))
6671 if (TARGET_P9_VECTOR)
6672 return "xxspltib %x0,0";
6674 else if (dest_vmx_p)
6675 return "vspltisw %0,0";
6677 else
6678 return "xxlxor %x0,%x0,%x0";
6681 if (all_ones_constant (vec, mode))
6683 if (TARGET_P9_VECTOR)
6684 return "xxspltib %x0,255";
6686 else if (dest_vmx_p)
6687 return "vspltisw %0,-1";
6689 else if (TARGET_P8_VECTOR)
6690 return "xxlorc %x0,%x0,%x0";
6692 else
6693 gcc_unreachable ();
6696 if (TARGET_P9_VECTOR
6697 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6699 if (num_insns == 1)
6701 operands[2] = GEN_INT (xxspltib_value & 0xff);
6702 return "xxspltib %x0,%2";
6705 return "#";
6709 if (TARGET_ALTIVEC)
6711 rtx splat_vec;
6713 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6714 if (zero_constant (vec, mode))
6715 return "vspltisw %0,0";
6717 if (all_ones_constant (vec, mode))
6718 return "vspltisw %0,-1";
6720 /* Do we need to construct a value using VSLDOI? */
6721 shift = vspltis_shifted (vec);
6722 if (shift != 0)
6723 return "#";
6725 splat_vec = gen_easy_altivec_constant (vec);
6726 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6727 operands[1] = XEXP (splat_vec, 0);
6728 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6729 return "#";
6731 switch (GET_MODE (splat_vec))
6733 case V4SImode:
6734 return "vspltisw %0,%1";
6736 case V8HImode:
6737 return "vspltish %0,%1";
6739 case V16QImode:
6740 return "vspltisb %0,%1";
6742 default:
6743 gcc_unreachable ();
6747 gcc_assert (TARGET_SPE);
6749 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6750 pattern of V1DI, V4HI, and V2SF.
6752 FIXME: We should probably return # and add post reload
6753 splitters for these, but this way is so easy ;-). */
6754 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6755 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6756 operands[1] = CONST_VECTOR_ELT (vec, 0);
6757 operands[2] = CONST_VECTOR_ELT (vec, 1);
6758 if (cst == cst2)
6759 return "li %0,%1\n\tevmergelo %0,%0,%0";
6760 else if (WORDS_BIG_ENDIAN)
6761 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6762 else
6763 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6766 /* Initialize TARGET of vector PAIRED to VALS. */
6768 void
6769 paired_expand_vector_init (rtx target, rtx vals)
6771 machine_mode mode = GET_MODE (target);
6772 int n_elts = GET_MODE_NUNITS (mode);
6773 int n_var = 0;
6774 rtx x, new_rtx, tmp, constant_op, op1, op2;
6775 int i;
6777 for (i = 0; i < n_elts; ++i)
6779 x = XVECEXP (vals, 0, i);
6780 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6781 ++n_var;
6783 if (n_var == 0)
6785 /* Load from constant pool. */
6786 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6787 return;
6790 if (n_var == 2)
6792 /* The vector is initialized only with non-constants. */
6793 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6794 XVECEXP (vals, 0, 1));
6796 emit_move_insn (target, new_rtx);
6797 return;
6800 /* One field is non-constant and the other one is a constant. Load the
6801 constant from the constant pool and use ps_merge instruction to
6802 construct the whole vector. */
6803 op1 = XVECEXP (vals, 0, 0);
6804 op2 = XVECEXP (vals, 0, 1);
6806 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6808 tmp = gen_reg_rtx (GET_MODE (constant_op));
6809 emit_move_insn (tmp, constant_op);
6811 if (CONSTANT_P (op1))
6812 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6813 else
6814 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6816 emit_move_insn (target, new_rtx);
6819 void
6820 paired_expand_vector_move (rtx operands[])
6822 rtx op0 = operands[0], op1 = operands[1];
6824 emit_move_insn (op0, op1);
6827 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6828 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6829 operands for the relation operation COND. This is a recursive
6830 function. */
6832 static void
6833 paired_emit_vector_compare (enum rtx_code rcode,
6834 rtx dest, rtx op0, rtx op1,
6835 rtx cc_op0, rtx cc_op1)
6837 rtx tmp = gen_reg_rtx (V2SFmode);
6838 rtx tmp1, max, min;
6840 gcc_assert (TARGET_PAIRED_FLOAT);
6841 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6843 switch (rcode)
6845 case LT:
6846 case LTU:
6847 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6848 return;
6849 case GE:
6850 case GEU:
6851 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6852 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6853 return;
6854 case LE:
6855 case LEU:
6856 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6857 return;
6858 case GT:
6859 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6860 return;
6861 case EQ:
6862 tmp1 = gen_reg_rtx (V2SFmode);
6863 max = gen_reg_rtx (V2SFmode);
6864 min = gen_reg_rtx (V2SFmode);
6865 gen_reg_rtx (V2SFmode);
6867 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6868 emit_insn (gen_selv2sf4
6869 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6870 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6871 emit_insn (gen_selv2sf4
6872 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6873 emit_insn (gen_subv2sf3 (tmp1, min, max));
6874 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6875 return;
6876 case NE:
6877 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6878 return;
6879 case UNLE:
6880 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6881 return;
6882 case UNLT:
6883 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6884 return;
6885 case UNGE:
6886 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6887 return;
6888 case UNGT:
6889 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6890 return;
6891 default:
6892 gcc_unreachable ();
6895 return;
6898 /* Emit vector conditional expression.
6899 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6900 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6903 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6904 rtx cond, rtx cc_op0, rtx cc_op1)
6906 enum rtx_code rcode = GET_CODE (cond);
6908 if (!TARGET_PAIRED_FLOAT)
6909 return 0;
6911 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6913 return 1;
6916 /* Initialize vector TARGET to VALS. */
6918 void
6919 rs6000_expand_vector_init (rtx target, rtx vals)
6921 machine_mode mode = GET_MODE (target);
6922 machine_mode inner_mode = GET_MODE_INNER (mode);
6923 int n_elts = GET_MODE_NUNITS (mode);
6924 int n_var = 0, one_var = -1;
6925 bool all_same = true, all_const_zero = true;
6926 rtx x, mem;
6927 int i;
6929 for (i = 0; i < n_elts; ++i)
6931 x = XVECEXP (vals, 0, i);
6932 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6933 ++n_var, one_var = i;
6934 else if (x != CONST0_RTX (inner_mode))
6935 all_const_zero = false;
6937 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6938 all_same = false;
6941 if (n_var == 0)
6943 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6944 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6945 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6947 /* Zero register. */
6948 emit_move_insn (target, CONST0_RTX (mode));
6949 return;
6951 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6953 /* Splat immediate. */
6954 emit_insn (gen_rtx_SET (target, const_vec));
6955 return;
6957 else
6959 /* Load from constant pool. */
6960 emit_move_insn (target, const_vec);
6961 return;
6965 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6966 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6968 rtx op[2];
6969 size_t i;
6970 size_t num_elements = all_same ? 1 : 2;
6971 for (i = 0; i < num_elements; i++)
6973 op[i] = XVECEXP (vals, 0, i);
6974 /* Just in case there is a SUBREG with a smaller mode, do a
6975 conversion. */
6976 if (GET_MODE (op[i]) != inner_mode)
6978 rtx tmp = gen_reg_rtx (inner_mode);
6979 convert_move (tmp, op[i], 0);
6980 op[i] = tmp;
6982 /* Allow load with splat double word. */
6983 else if (MEM_P (op[i]))
6985 if (!all_same)
6986 op[i] = force_reg (inner_mode, op[i]);
6988 else if (!REG_P (op[i]))
6989 op[i] = force_reg (inner_mode, op[i]);
6992 if (all_same)
6994 if (mode == V2DFmode)
6995 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6996 else
6997 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6999 else
7001 if (mode == V2DFmode)
7002 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7003 else
7004 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7006 return;
7009 /* Special case initializing vector int if we are on 64-bit systems with
7010 direct move or we have the ISA 3.0 instructions. */
7011 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7012 && TARGET_DIRECT_MOVE_64BIT)
7014 if (all_same)
7016 rtx element0 = XVECEXP (vals, 0, 0);
7017 if (MEM_P (element0))
7018 element0 = rs6000_address_for_fpconvert (element0);
7019 else
7020 element0 = force_reg (SImode, element0);
7022 if (TARGET_P9_VECTOR)
7023 emit_insn (gen_vsx_splat_v4si (target, element0));
7024 else
7026 rtx tmp = gen_reg_rtx (DImode);
7027 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7028 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7030 return;
7032 else
7034 rtx elements[4];
7035 size_t i;
7037 for (i = 0; i < 4; i++)
7039 elements[i] = XVECEXP (vals, 0, i);
7040 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7041 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7044 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7045 elements[2], elements[3]));
7046 return;
7050 /* With single precision floating point on VSX, know that internally single
7051 precision is actually represented as a double, and either make 2 V2DF
7052 vectors, and convert these vectors to single precision, or do one
7053 conversion, and splat the result to the other elements. */
7054 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7056 if (all_same)
7058 rtx element0 = XVECEXP (vals, 0, 0);
7060 if (TARGET_P9_VECTOR)
7062 if (MEM_P (element0))
7063 element0 = rs6000_address_for_fpconvert (element0);
7065 emit_insn (gen_vsx_splat_v4sf (target, element0));
7068 else
7070 rtx freg = gen_reg_rtx (V4SFmode);
7071 rtx sreg = force_reg (SFmode, element0);
7072 rtx cvt = (TARGET_XSCVDPSPN
7073 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7074 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7076 emit_insn (cvt);
7077 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7078 const0_rtx));
7081 else
7083 rtx dbl_even = gen_reg_rtx (V2DFmode);
7084 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7085 rtx flt_even = gen_reg_rtx (V4SFmode);
7086 rtx flt_odd = gen_reg_rtx (V4SFmode);
7087 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7088 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7089 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7090 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7092 /* Use VMRGEW if we can instead of doing a permute. */
7093 if (TARGET_P8_VECTOR)
7095 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7096 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7097 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7098 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7099 if (BYTES_BIG_ENDIAN)
7100 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7101 else
7102 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7104 else
7106 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7107 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7108 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7109 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7110 rs6000_expand_extract_even (target, flt_even, flt_odd);
7113 return;
7116 /* Special case initializing vector short/char that are splats if we are on
7117 64-bit systems with direct move. */
7118 if (all_same && TARGET_DIRECT_MOVE_64BIT
7119 && (mode == V16QImode || mode == V8HImode))
7121 rtx op0 = XVECEXP (vals, 0, 0);
7122 rtx di_tmp = gen_reg_rtx (DImode);
7124 if (!REG_P (op0))
7125 op0 = force_reg (GET_MODE_INNER (mode), op0);
7127 if (mode == V16QImode)
7129 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7130 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7131 return;
7134 if (mode == V8HImode)
7136 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7137 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7138 return;
7142 /* Store value to stack temp. Load vector element. Splat. However, splat
7143 of 64-bit items is not supported on Altivec. */
7144 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7146 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7147 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7148 XVECEXP (vals, 0, 0));
7149 x = gen_rtx_UNSPEC (VOIDmode,
7150 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7151 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7152 gen_rtvec (2,
7153 gen_rtx_SET (target, mem),
7154 x)));
7155 x = gen_rtx_VEC_SELECT (inner_mode, target,
7156 gen_rtx_PARALLEL (VOIDmode,
7157 gen_rtvec (1, const0_rtx)));
7158 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7159 return;
7162 /* One field is non-constant. Load constant then overwrite
7163 varying field. */
7164 if (n_var == 1)
7166 rtx copy = copy_rtx (vals);
7168 /* Load constant part of vector, substitute neighboring value for
7169 varying element. */
7170 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7171 rs6000_expand_vector_init (target, copy);
7173 /* Insert variable. */
7174 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7175 return;
7178 /* Construct the vector in memory one field at a time
7179 and load the whole vector. */
7180 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7181 for (i = 0; i < n_elts; i++)
7182 emit_move_insn (adjust_address_nv (mem, inner_mode,
7183 i * GET_MODE_SIZE (inner_mode)),
7184 XVECEXP (vals, 0, i));
7185 emit_move_insn (target, mem);
7188 /* Set field ELT of TARGET to VAL. */
7190 void
7191 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7193 machine_mode mode = GET_MODE (target);
7194 machine_mode inner_mode = GET_MODE_INNER (mode);
7195 rtx reg = gen_reg_rtx (mode);
7196 rtx mask, mem, x;
7197 int width = GET_MODE_SIZE (inner_mode);
7198 int i;
7200 val = force_reg (GET_MODE (val), val);
7202 if (VECTOR_MEM_VSX_P (mode))
7204 rtx insn = NULL_RTX;
7205 rtx elt_rtx = GEN_INT (elt);
7207 if (mode == V2DFmode)
7208 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7210 else if (mode == V2DImode)
7211 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7213 else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7214 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7216 if (mode == V4SImode)
7217 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7218 else if (mode == V8HImode)
7219 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7220 else if (mode == V16QImode)
7221 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7224 if (insn)
7226 emit_insn (insn);
7227 return;
7231 /* Simplify setting single element vectors like V1TImode. */
7232 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7234 emit_move_insn (target, gen_lowpart (mode, val));
7235 return;
7238 /* Load single variable value. */
7239 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7240 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7241 x = gen_rtx_UNSPEC (VOIDmode,
7242 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7243 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7244 gen_rtvec (2,
7245 gen_rtx_SET (reg, mem),
7246 x)));
7248 /* Linear sequence. */
7249 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7250 for (i = 0; i < 16; ++i)
7251 XVECEXP (mask, 0, i) = GEN_INT (i);
7253 /* Set permute mask to insert element into target. */
7254 for (i = 0; i < width; ++i)
7255 XVECEXP (mask, 0, elt*width + i)
7256 = GEN_INT (i + 0x10);
7257 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7259 if (BYTES_BIG_ENDIAN)
7260 x = gen_rtx_UNSPEC (mode,
7261 gen_rtvec (3, target, reg,
7262 force_reg (V16QImode, x)),
7263 UNSPEC_VPERM);
7264 else
7266 if (TARGET_P9_VECTOR)
7267 x = gen_rtx_UNSPEC (mode,
7268 gen_rtvec (3, target, reg,
7269 force_reg (V16QImode, x)),
7270 UNSPEC_VPERMR);
7271 else
7273 /* Invert selector. We prefer to generate VNAND on P8 so
7274 that future fusion opportunities can kick in, but must
7275 generate VNOR elsewhere. */
7276 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7277 rtx iorx = (TARGET_P8_VECTOR
7278 ? gen_rtx_IOR (V16QImode, notx, notx)
7279 : gen_rtx_AND (V16QImode, notx, notx));
7280 rtx tmp = gen_reg_rtx (V16QImode);
7281 emit_insn (gen_rtx_SET (tmp, iorx));
7283 /* Permute with operands reversed and adjusted selector. */
7284 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7285 UNSPEC_VPERM);
7289 emit_insn (gen_rtx_SET (target, x));
7292 /* Extract field ELT from VEC into TARGET. */
7294 void
7295 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7297 machine_mode mode = GET_MODE (vec);
7298 machine_mode inner_mode = GET_MODE_INNER (mode);
7299 rtx mem;
7301 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7303 switch (mode)
7305 default:
7306 break;
7307 case V1TImode:
7308 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7309 emit_move_insn (target, gen_lowpart (TImode, vec));
7310 break;
7311 case V2DFmode:
7312 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7313 return;
7314 case V2DImode:
7315 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7316 return;
7317 case V4SFmode:
7318 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7319 return;
7320 case V16QImode:
7321 if (TARGET_DIRECT_MOVE_64BIT)
7323 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7324 return;
7326 else
7327 break;
7328 case V8HImode:
7329 if (TARGET_DIRECT_MOVE_64BIT)
7331 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7332 return;
7334 else
7335 break;
7336 case V4SImode:
7337 if (TARGET_DIRECT_MOVE_64BIT)
7339 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7340 return;
7342 break;
7345 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7346 && TARGET_DIRECT_MOVE_64BIT)
7348 if (GET_MODE (elt) != DImode)
7350 rtx tmp = gen_reg_rtx (DImode);
7351 convert_move (tmp, elt, 0);
7352 elt = tmp;
7354 else if (!REG_P (elt))
7355 elt = force_reg (DImode, elt);
7357 switch (mode)
7359 case V2DFmode:
7360 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7361 return;
7363 case V2DImode:
7364 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7365 return;
7367 case V4SFmode:
7368 if (TARGET_UPPER_REGS_SF)
7370 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7371 return;
7373 break;
7375 case V4SImode:
7376 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7377 return;
7379 case V8HImode:
7380 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7381 return;
7383 case V16QImode:
7384 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7385 return;
7387 default:
7388 gcc_unreachable ();
7392 gcc_assert (CONST_INT_P (elt));
7394 /* Allocate mode-sized buffer. */
7395 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7397 emit_move_insn (mem, vec);
7399 /* Add offset to field within buffer matching vector element. */
7400 mem = adjust_address_nv (mem, inner_mode,
7401 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7403 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7406 /* Helper function to return the register number of a RTX. */
7407 static inline int
7408 regno_or_subregno (rtx op)
7410 if (REG_P (op))
7411 return REGNO (op);
7412 else if (SUBREG_P (op))
7413 return subreg_regno (op);
7414 else
7415 gcc_unreachable ();
7418 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7419 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7420 temporary (BASE_TMP) to fixup the address. Return the new memory address
7421 that is valid for reads or writes to a given register (SCALAR_REG). */
7424 rs6000_adjust_vec_address (rtx scalar_reg,
7425 rtx mem,
7426 rtx element,
7427 rtx base_tmp,
7428 machine_mode scalar_mode)
7430 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7431 rtx addr = XEXP (mem, 0);
7432 rtx element_offset;
7433 rtx new_addr;
7434 bool valid_addr_p;
7436 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7437 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7439 /* Calculate what we need to add to the address to get the element
7440 address. */
7441 if (CONST_INT_P (element))
7442 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7443 else
7445 int byte_shift = exact_log2 (scalar_size);
7446 gcc_assert (byte_shift >= 0);
7448 if (byte_shift == 0)
7449 element_offset = element;
7451 else
7453 if (TARGET_POWERPC64)
7454 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7455 else
7456 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7458 element_offset = base_tmp;
7462 /* Create the new address pointing to the element within the vector. If we
7463 are adding 0, we don't have to change the address. */
7464 if (element_offset == const0_rtx)
7465 new_addr = addr;
7467 /* A simple indirect address can be converted into a reg + offset
7468 address. */
7469 else if (REG_P (addr) || SUBREG_P (addr))
7470 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7472 /* Optimize D-FORM addresses with constant offset with a constant element, to
7473 include the element offset in the address directly. */
7474 else if (GET_CODE (addr) == PLUS)
7476 rtx op0 = XEXP (addr, 0);
7477 rtx op1 = XEXP (addr, 1);
7478 rtx insn;
7480 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7481 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7483 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7484 rtx offset_rtx = GEN_INT (offset);
7486 if (IN_RANGE (offset, -32768, 32767)
7487 && (scalar_size < 8 || (offset & 0x3) == 0))
7488 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7489 else
7491 emit_move_insn (base_tmp, offset_rtx);
7492 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7495 else
7497 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7498 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7500 /* Note, ADDI requires the register being added to be a base
7501 register. If the register was R0, load it up into the temporary
7502 and do the add. */
7503 if (op1_reg_p
7504 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7506 insn = gen_add3_insn (base_tmp, op1, element_offset);
7507 gcc_assert (insn != NULL_RTX);
7508 emit_insn (insn);
7511 else if (ele_reg_p
7512 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7514 insn = gen_add3_insn (base_tmp, element_offset, op1);
7515 gcc_assert (insn != NULL_RTX);
7516 emit_insn (insn);
7519 else
7521 emit_move_insn (base_tmp, op1);
7522 emit_insn (gen_add2_insn (base_tmp, element_offset));
7525 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7529 else
7531 emit_move_insn (base_tmp, addr);
7532 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7535 /* If we have a PLUS, we need to see whether the particular register class
7536 allows for D-FORM or X-FORM addressing. */
7537 if (GET_CODE (new_addr) == PLUS)
7539 rtx op1 = XEXP (new_addr, 1);
7540 addr_mask_type addr_mask;
7541 int scalar_regno = regno_or_subregno (scalar_reg);
7543 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7544 if (INT_REGNO_P (scalar_regno))
7545 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7547 else if (FP_REGNO_P (scalar_regno))
7548 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7550 else if (ALTIVEC_REGNO_P (scalar_regno))
7551 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7553 else
7554 gcc_unreachable ();
7556 if (REG_P (op1) || SUBREG_P (op1))
7557 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7558 else
7559 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7562 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7563 valid_addr_p = true;
7565 else
7566 valid_addr_p = false;
7568 if (!valid_addr_p)
7570 emit_move_insn (base_tmp, new_addr);
7571 new_addr = base_tmp;
7574 return change_address (mem, scalar_mode, new_addr);
7577 /* Split a variable vec_extract operation into the component instructions. */
7579 void
7580 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7581 rtx tmp_altivec)
7583 machine_mode mode = GET_MODE (src);
7584 machine_mode scalar_mode = GET_MODE (dest);
7585 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7586 int byte_shift = exact_log2 (scalar_size);
7588 gcc_assert (byte_shift >= 0);
7590 /* If we are given a memory address, optimize to load just the element. We
7591 don't have to adjust the vector element number on little endian
7592 systems. */
7593 if (MEM_P (src))
7595 gcc_assert (REG_P (tmp_gpr));
7596 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7597 tmp_gpr, scalar_mode));
7598 return;
7601 else if (REG_P (src) || SUBREG_P (src))
7603 int bit_shift = byte_shift + 3;
7604 rtx element2;
7605 int dest_regno = regno_or_subregno (dest);
7606 int src_regno = regno_or_subregno (src);
7607 int element_regno = regno_or_subregno (element);
7609 gcc_assert (REG_P (tmp_gpr));
7611 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7612 a general purpose register. */
7613 if (TARGET_P9_VECTOR
7614 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7615 && INT_REGNO_P (dest_regno)
7616 && ALTIVEC_REGNO_P (src_regno)
7617 && INT_REGNO_P (element_regno))
7619 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7620 rtx element_si = gen_rtx_REG (SImode, element_regno);
7622 if (mode == V16QImode)
7623 emit_insn (VECTOR_ELT_ORDER_BIG
7624 ? gen_vextublx (dest_si, element_si, src)
7625 : gen_vextubrx (dest_si, element_si, src));
7627 else if (mode == V8HImode)
7629 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7630 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7631 emit_insn (VECTOR_ELT_ORDER_BIG
7632 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7633 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7637 else
7639 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7640 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7641 emit_insn (VECTOR_ELT_ORDER_BIG
7642 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7643 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7646 return;
7650 gcc_assert (REG_P (tmp_altivec));
7652 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7653 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7654 will shift the element into the upper position (adding 3 to convert a
7655 byte shift into a bit shift). */
7656 if (scalar_size == 8)
7658 if (!VECTOR_ELT_ORDER_BIG)
7660 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7661 element2 = tmp_gpr;
7663 else
7664 element2 = element;
7666 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7667 bit. */
7668 emit_insn (gen_rtx_SET (tmp_gpr,
7669 gen_rtx_AND (DImode,
7670 gen_rtx_ASHIFT (DImode,
7671 element2,
7672 GEN_INT (6)),
7673 GEN_INT (64))));
7675 else
7677 if (!VECTOR_ELT_ORDER_BIG)
7679 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7681 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7682 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7683 element2 = tmp_gpr;
7685 else
7686 element2 = element;
7688 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7691 /* Get the value into the lower byte of the Altivec register where VSLO
7692 expects it. */
7693 if (TARGET_P9_VECTOR)
7694 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7695 else if (can_create_pseudo_p ())
7696 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7697 else
7699 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7700 emit_move_insn (tmp_di, tmp_gpr);
7701 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7704 /* Do the VSLO to get the value into the final location. */
7705 switch (mode)
7707 case V2DFmode:
7708 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7709 return;
7711 case V2DImode:
7712 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7713 return;
7715 case V4SFmode:
7717 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7718 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7719 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7720 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7721 tmp_altivec));
7723 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7724 return;
7727 case V4SImode:
7728 case V8HImode:
7729 case V16QImode:
7731 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7732 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7733 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7734 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7735 tmp_altivec));
7736 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7737 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7738 GEN_INT (64 - (8 * scalar_size))));
7739 return;
7742 default:
7743 gcc_unreachable ();
7746 return;
7748 else
7749 gcc_unreachable ();
7752 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7753 two SImode values. */
7755 static void
7756 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7758 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7760 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7762 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7763 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7765 emit_move_insn (dest, GEN_INT (const1 | const2));
7766 return;
7769 /* Put si1 into upper 32-bits of dest. */
7770 if (CONST_INT_P (si1))
7771 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7772 else
7774 /* Generate RLDIC. */
7775 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7776 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7777 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7778 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7779 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7780 emit_insn (gen_rtx_SET (dest, and_rtx));
7783 /* Put si2 into the temporary. */
7784 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7785 if (CONST_INT_P (si2))
7786 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7787 else
7788 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7790 /* Combine the two parts. */
7791 emit_insn (gen_iordi3 (dest, dest, tmp));
7792 return;
7795 /* Split a V4SI initialization. */
7797 void
7798 rs6000_split_v4si_init (rtx operands[])
7800 rtx dest = operands[0];
7802 /* Destination is a GPR, build up the two DImode parts in place. */
7803 if (REG_P (dest) || SUBREG_P (dest))
7805 int d_regno = regno_or_subregno (dest);
7806 rtx scalar1 = operands[1];
7807 rtx scalar2 = operands[2];
7808 rtx scalar3 = operands[3];
7809 rtx scalar4 = operands[4];
7810 rtx tmp1 = operands[5];
7811 rtx tmp2 = operands[6];
7813 /* Even though we only need one temporary (plus the destination, which
7814 has an early clobber constraint, try to use two temporaries, one for
7815 each double word created. That way the 2nd insn scheduling pass can
7816 rearrange things so the two parts are done in parallel. */
7817 if (BYTES_BIG_ENDIAN)
7819 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7820 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7821 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7822 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7824 else
7826 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7827 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7828 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7829 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7830 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7832 return;
7835 else
7836 gcc_unreachable ();
7839 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
7841 bool
7842 invalid_e500_subreg (rtx op, machine_mode mode)
7844 if (TARGET_E500_DOUBLE)
7846 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
7847 subreg:TI and reg:TF. Decimal float modes are like integer
7848 modes (only low part of each register used) for this
7849 purpose. */
7850 if (GET_CODE (op) == SUBREG
7851 && (mode == SImode || mode == DImode || mode == TImode
7852 || mode == DDmode || mode == TDmode || mode == PTImode)
7853 && REG_P (SUBREG_REG (op))
7854 && (GET_MODE (SUBREG_REG (op)) == DFmode
7855 || GET_MODE (SUBREG_REG (op)) == TFmode
7856 || GET_MODE (SUBREG_REG (op)) == IFmode
7857 || GET_MODE (SUBREG_REG (op)) == KFmode))
7858 return true;
7860 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
7861 reg:TI. */
7862 if (GET_CODE (op) == SUBREG
7863 && (mode == DFmode || mode == TFmode || mode == IFmode
7864 || mode == KFmode)
7865 && REG_P (SUBREG_REG (op))
7866 && (GET_MODE (SUBREG_REG (op)) == DImode
7867 || GET_MODE (SUBREG_REG (op)) == TImode
7868 || GET_MODE (SUBREG_REG (op)) == PTImode
7869 || GET_MODE (SUBREG_REG (op)) == DDmode
7870 || GET_MODE (SUBREG_REG (op)) == TDmode))
7871 return true;
7874 if (TARGET_SPE
7875 && GET_CODE (op) == SUBREG
7876 && mode == SImode
7877 && REG_P (SUBREG_REG (op))
7878 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
7879 return true;
7881 return false;
7884 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7885 selects whether the alignment is abi mandated, optional, or
7886 both abi and optional alignment. */
7888 unsigned int
7889 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7891 if (how != align_opt)
7893 if (TREE_CODE (type) == VECTOR_TYPE)
7895 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7896 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7898 if (align < 64)
7899 align = 64;
7901 else if (align < 128)
7902 align = 128;
7904 else if (TARGET_E500_DOUBLE
7905 && TREE_CODE (type) == REAL_TYPE
7906 && TYPE_MODE (type) == DFmode)
7908 if (align < 64)
7909 align = 64;
7913 if (how != align_abi)
7915 if (TREE_CODE (type) == ARRAY_TYPE
7916 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7918 if (align < BITS_PER_WORD)
7919 align = BITS_PER_WORD;
7923 return align;
7926 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7928 bool
7929 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7931 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7933 if (computed != 128)
7935 static bool warned;
7936 if (!warned && warn_psabi)
7938 warned = true;
7939 inform (input_location,
7940 "the layout of aggregates containing vectors with"
7941 " %d-byte alignment has changed in GCC 5",
7942 computed / BITS_PER_UNIT);
7945 /* In current GCC there is no special case. */
7946 return false;
7949 return false;
7952 /* AIX increases natural record alignment to doubleword if the first
7953 field is an FP double while the FP fields remain word aligned. */
7955 unsigned int
7956 rs6000_special_round_type_align (tree type, unsigned int computed,
7957 unsigned int specified)
7959 unsigned int align = MAX (computed, specified);
7960 tree field = TYPE_FIELDS (type);
7962 /* Skip all non field decls */
7963 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7964 field = DECL_CHAIN (field);
7966 if (field != NULL && field != type)
7968 type = TREE_TYPE (field);
7969 while (TREE_CODE (type) == ARRAY_TYPE)
7970 type = TREE_TYPE (type);
7972 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7973 align = MAX (align, 64);
7976 return align;
7979 /* Darwin increases record alignment to the natural alignment of
7980 the first field. */
7982 unsigned int
7983 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7984 unsigned int specified)
7986 unsigned int align = MAX (computed, specified);
7988 if (TYPE_PACKED (type))
7989 return align;
7991 /* Find the first field, looking down into aggregates. */
7992 do {
7993 tree field = TYPE_FIELDS (type);
7994 /* Skip all non field decls */
7995 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7996 field = DECL_CHAIN (field);
7997 if (! field)
7998 break;
7999 /* A packed field does not contribute any extra alignment. */
8000 if (DECL_PACKED (field))
8001 return align;
8002 type = TREE_TYPE (field);
8003 while (TREE_CODE (type) == ARRAY_TYPE)
8004 type = TREE_TYPE (type);
8005 } while (AGGREGATE_TYPE_P (type));
8007 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8008 align = MAX (align, TYPE_ALIGN (type));
8010 return align;
8013 /* Return 1 for an operand in small memory on V.4/eabi. */
8016 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8017 machine_mode mode ATTRIBUTE_UNUSED)
8019 #if TARGET_ELF
8020 rtx sym_ref;
8022 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8023 return 0;
8025 if (DEFAULT_ABI != ABI_V4)
8026 return 0;
8028 /* Vector and float memory instructions have a limited offset on the
8029 SPE, so using a vector or float variable directly as an operand is
8030 not useful. */
8031 if (TARGET_SPE
8032 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
8033 return 0;
8035 if (GET_CODE (op) == SYMBOL_REF)
8036 sym_ref = op;
8038 else if (GET_CODE (op) != CONST
8039 || GET_CODE (XEXP (op, 0)) != PLUS
8040 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8041 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8042 return 0;
8044 else
8046 rtx sum = XEXP (op, 0);
8047 HOST_WIDE_INT summand;
8049 /* We have to be careful here, because it is the referenced address
8050 that must be 32k from _SDA_BASE_, not just the symbol. */
8051 summand = INTVAL (XEXP (sum, 1));
8052 if (summand < 0 || summand > g_switch_value)
8053 return 0;
8055 sym_ref = XEXP (sum, 0);
8058 return SYMBOL_REF_SMALL_P (sym_ref);
8059 #else
8060 return 0;
8061 #endif
8064 /* Return true if either operand is a general purpose register. */
8066 bool
8067 gpr_or_gpr_p (rtx op0, rtx op1)
8069 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8070 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8073 /* Return true if this is a move direct operation between GPR registers and
8074 floating point/VSX registers. */
8076 bool
8077 direct_move_p (rtx op0, rtx op1)
8079 int regno0, regno1;
8081 if (!REG_P (op0) || !REG_P (op1))
8082 return false;
8084 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8085 return false;
8087 regno0 = REGNO (op0);
8088 regno1 = REGNO (op1);
8089 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8090 return false;
8092 if (INT_REGNO_P (regno0))
8093 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8095 else if (INT_REGNO_P (regno1))
8097 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8098 return true;
8100 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8101 return true;
8104 return false;
8107 /* Return true if the OFFSET is valid for the quad address instructions that
8108 use d-form (register + offset) addressing. */
8110 static inline bool
8111 quad_address_offset_p (HOST_WIDE_INT offset)
8113 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8116 /* Return true if the ADDR is an acceptable address for a quad memory
8117 operation of mode MODE (either LQ/STQ for general purpose registers, or
8118 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8119 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8120 3.0 LXV/STXV instruction. */
8122 bool
8123 quad_address_p (rtx addr, machine_mode mode, bool strict)
8125 rtx op0, op1;
8127 if (GET_MODE_SIZE (mode) != 16)
8128 return false;
8130 if (legitimate_indirect_address_p (addr, strict))
8131 return true;
8133 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8134 return false;
8136 if (GET_CODE (addr) != PLUS)
8137 return false;
8139 op0 = XEXP (addr, 0);
8140 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8141 return false;
8143 op1 = XEXP (addr, 1);
8144 if (!CONST_INT_P (op1))
8145 return false;
8147 return quad_address_offset_p (INTVAL (op1));
8150 /* Return true if this is a load or store quad operation. This function does
8151 not handle the atomic quad memory instructions. */
8153 bool
8154 quad_load_store_p (rtx op0, rtx op1)
8156 bool ret;
8158 if (!TARGET_QUAD_MEMORY)
8159 ret = false;
8161 else if (REG_P (op0) && MEM_P (op1))
8162 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8163 && quad_memory_operand (op1, GET_MODE (op1))
8164 && !reg_overlap_mentioned_p (op0, op1));
8166 else if (MEM_P (op0) && REG_P (op1))
8167 ret = (quad_memory_operand (op0, GET_MODE (op0))
8168 && quad_int_reg_operand (op1, GET_MODE (op1)));
8170 else
8171 ret = false;
8173 if (TARGET_DEBUG_ADDR)
8175 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8176 ret ? "true" : "false");
8177 debug_rtx (gen_rtx_SET (op0, op1));
8180 return ret;
8183 /* Given an address, return a constant offset term if one exists. */
8185 static rtx
8186 address_offset (rtx op)
8188 if (GET_CODE (op) == PRE_INC
8189 || GET_CODE (op) == PRE_DEC)
8190 op = XEXP (op, 0);
8191 else if (GET_CODE (op) == PRE_MODIFY
8192 || GET_CODE (op) == LO_SUM)
8193 op = XEXP (op, 1);
8195 if (GET_CODE (op) == CONST)
8196 op = XEXP (op, 0);
8198 if (GET_CODE (op) == PLUS)
8199 op = XEXP (op, 1);
8201 if (CONST_INT_P (op))
8202 return op;
8204 return NULL_RTX;
8207 /* Return true if the MEM operand is a memory operand suitable for use
8208 with a (full width, possibly multiple) gpr load/store. On
8209 powerpc64 this means the offset must be divisible by 4.
8210 Implements 'Y' constraint.
8212 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8213 a constraint function we know the operand has satisfied a suitable
8214 memory predicate. Also accept some odd rtl generated by reload
8215 (see rs6000_legitimize_reload_address for various forms). It is
8216 important that reload rtl be accepted by appropriate constraints
8217 but not by the operand predicate.
8219 Offsetting a lo_sum should not be allowed, except where we know by
8220 alignment that a 32k boundary is not crossed, but see the ???
8221 comment in rs6000_legitimize_reload_address. Note that by
8222 "offsetting" here we mean a further offset to access parts of the
8223 MEM. It's fine to have a lo_sum where the inner address is offset
8224 from a sym, since the same sym+offset will appear in the high part
8225 of the address calculation. */
8227 bool
8228 mem_operand_gpr (rtx op, machine_mode mode)
8230 unsigned HOST_WIDE_INT offset;
8231 int extra;
8232 rtx addr = XEXP (op, 0);
8234 op = address_offset (addr);
8235 if (op == NULL_RTX)
8236 return true;
8238 offset = INTVAL (op);
8239 if (TARGET_POWERPC64 && (offset & 3) != 0)
8240 return false;
8242 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8243 if (extra < 0)
8244 extra = 0;
8246 if (GET_CODE (addr) == LO_SUM)
8247 /* For lo_sum addresses, we must allow any offset except one that
8248 causes a wrap, so test only the low 16 bits. */
8249 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8251 return offset + 0x8000 < 0x10000u - extra;
8254 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8255 enforce an offset divisible by 4 even for 32-bit. */
8257 bool
8258 mem_operand_ds_form (rtx op, machine_mode mode)
8260 unsigned HOST_WIDE_INT offset;
8261 int extra;
8262 rtx addr = XEXP (op, 0);
8264 if (!offsettable_address_p (false, mode, addr))
8265 return false;
8267 op = address_offset (addr);
8268 if (op == NULL_RTX)
8269 return true;
8271 offset = INTVAL (op);
8272 if ((offset & 3) != 0)
8273 return false;
8275 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8276 if (extra < 0)
8277 extra = 0;
8279 if (GET_CODE (addr) == LO_SUM)
8280 /* For lo_sum addresses, we must allow any offset except one that
8281 causes a wrap, so test only the low 16 bits. */
8282 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8284 return offset + 0x8000 < 0x10000u - extra;
8287 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8289 static bool
8290 reg_offset_addressing_ok_p (machine_mode mode)
8292 switch (mode)
8294 case V16QImode:
8295 case V8HImode:
8296 case V4SFmode:
8297 case V4SImode:
8298 case V2DFmode:
8299 case V2DImode:
8300 case V1TImode:
8301 case TImode:
8302 case TFmode:
8303 case KFmode:
8304 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8305 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8306 a vector mode, if we want to use the VSX registers to move it around,
8307 we need to restrict ourselves to reg+reg addressing. Similarly for
8308 IEEE 128-bit floating point that is passed in a single vector
8309 register. */
8310 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8311 return mode_supports_vsx_dform_quad (mode);
8312 break;
8314 case V4HImode:
8315 case V2SImode:
8316 case V1DImode:
8317 case V2SFmode:
8318 /* Paired vector modes. Only reg+reg addressing is valid. */
8319 if (TARGET_PAIRED_FLOAT)
8320 return false;
8321 break;
8323 case SDmode:
8324 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8325 addressing for the LFIWZX and STFIWX instructions. */
8326 if (TARGET_NO_SDMODE_STACK)
8327 return false;
8328 break;
8330 default:
8331 break;
8334 return true;
8337 static bool
8338 virtual_stack_registers_memory_p (rtx op)
8340 int regnum;
8342 if (GET_CODE (op) == REG)
8343 regnum = REGNO (op);
8345 else if (GET_CODE (op) == PLUS
8346 && GET_CODE (XEXP (op, 0)) == REG
8347 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8348 regnum = REGNO (XEXP (op, 0));
8350 else
8351 return false;
8353 return (regnum >= FIRST_VIRTUAL_REGISTER
8354 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8357 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8358 is known to not straddle a 32k boundary. This function is used
8359 to determine whether -mcmodel=medium code can use TOC pointer
8360 relative addressing for OP. This means the alignment of the TOC
8361 pointer must also be taken into account, and unfortunately that is
8362 only 8 bytes. */
8364 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8365 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8366 #endif
8368 static bool
8369 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8370 machine_mode mode)
8372 tree decl;
8373 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8375 if (GET_CODE (op) != SYMBOL_REF)
8376 return false;
8378 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8379 SYMBOL_REF. */
8380 if (mode_supports_vsx_dform_quad (mode))
8381 return false;
8383 dsize = GET_MODE_SIZE (mode);
8384 decl = SYMBOL_REF_DECL (op);
8385 if (!decl)
8387 if (dsize == 0)
8388 return false;
8390 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8391 replacing memory addresses with an anchor plus offset. We
8392 could find the decl by rummaging around in the block->objects
8393 VEC for the given offset but that seems like too much work. */
8394 dalign = BITS_PER_UNIT;
8395 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8396 && SYMBOL_REF_ANCHOR_P (op)
8397 && SYMBOL_REF_BLOCK (op) != NULL)
8399 struct object_block *block = SYMBOL_REF_BLOCK (op);
8401 dalign = block->alignment;
8402 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8404 else if (CONSTANT_POOL_ADDRESS_P (op))
8406 /* It would be nice to have get_pool_align().. */
8407 machine_mode cmode = get_pool_mode (op);
8409 dalign = GET_MODE_ALIGNMENT (cmode);
8412 else if (DECL_P (decl))
8414 dalign = DECL_ALIGN (decl);
8416 if (dsize == 0)
8418 /* Allow BLKmode when the entire object is known to not
8419 cross a 32k boundary. */
8420 if (!DECL_SIZE_UNIT (decl))
8421 return false;
8423 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8424 return false;
8426 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8427 if (dsize > 32768)
8428 return false;
8430 dalign /= BITS_PER_UNIT;
8431 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8432 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8433 return dalign >= dsize;
8436 else
8437 gcc_unreachable ();
8439 /* Find how many bits of the alignment we know for this access. */
8440 dalign /= BITS_PER_UNIT;
8441 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8442 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8443 mask = dalign - 1;
8444 lsb = offset & -offset;
8445 mask &= lsb - 1;
8446 dalign = mask + 1;
8448 return dalign >= dsize;
8451 static bool
8452 constant_pool_expr_p (rtx op)
8454 rtx base, offset;
8456 split_const (op, &base, &offset);
8457 return (GET_CODE (base) == SYMBOL_REF
8458 && CONSTANT_POOL_ADDRESS_P (base)
8459 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8462 static const_rtx tocrel_base, tocrel_offset;
8464 /* Return true if OP is a toc pointer relative address (the output
8465 of create_TOC_reference). If STRICT, do not match non-split
8466 -mcmodel=large/medium toc pointer relative addresses. */
8468 bool
8469 toc_relative_expr_p (const_rtx op, bool strict)
8471 if (!TARGET_TOC)
8472 return false;
8474 if (TARGET_CMODEL != CMODEL_SMALL)
8476 /* When strict ensure we have everything tidy. */
8477 if (strict
8478 && !(GET_CODE (op) == LO_SUM
8479 && REG_P (XEXP (op, 0))
8480 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8481 return false;
8483 /* When not strict, allow non-split TOC addresses and also allow
8484 (lo_sum (high ..)) TOC addresses created during reload. */
8485 if (GET_CODE (op) == LO_SUM)
8486 op = XEXP (op, 1);
8489 tocrel_base = op;
8490 tocrel_offset = const0_rtx;
8491 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8493 tocrel_base = XEXP (op, 0);
8494 tocrel_offset = XEXP (op, 1);
8497 return (GET_CODE (tocrel_base) == UNSPEC
8498 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8501 /* Return true if X is a constant pool address, and also for cmodel=medium
8502 if X is a toc-relative address known to be offsettable within MODE. */
8504 bool
8505 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8506 bool strict)
8508 return (toc_relative_expr_p (x, strict)
8509 && (TARGET_CMODEL != CMODEL_MEDIUM
8510 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8511 || mode == QImode
8512 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8513 INTVAL (tocrel_offset), mode)));
8516 static bool
8517 legitimate_small_data_p (machine_mode mode, rtx x)
8519 return (DEFAULT_ABI == ABI_V4
8520 && !flag_pic && !TARGET_TOC
8521 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8522 && small_data_operand (x, mode));
8525 /* SPE offset addressing is limited to 5-bits worth of double words. */
8526 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8528 bool
8529 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8530 bool strict, bool worst_case)
8532 unsigned HOST_WIDE_INT offset;
8533 unsigned int extra;
8535 if (GET_CODE (x) != PLUS)
8536 return false;
8537 if (!REG_P (XEXP (x, 0)))
8538 return false;
8539 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8540 return false;
8541 if (mode_supports_vsx_dform_quad (mode))
8542 return quad_address_p (x, mode, strict);
8543 if (!reg_offset_addressing_ok_p (mode))
8544 return virtual_stack_registers_memory_p (x);
8545 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8546 return true;
8547 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8548 return false;
8550 offset = INTVAL (XEXP (x, 1));
8551 extra = 0;
8552 switch (mode)
8554 case V4HImode:
8555 case V2SImode:
8556 case V1DImode:
8557 case V2SFmode:
8558 /* SPE vector modes. */
8559 return SPE_CONST_OFFSET_OK (offset);
8561 case DFmode:
8562 case DDmode:
8563 case DImode:
8564 /* On e500v2, we may have:
8566 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8568 Which gets addressed with evldd instructions. */
8569 if (TARGET_E500_DOUBLE)
8570 return SPE_CONST_OFFSET_OK (offset);
8572 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8573 addressing. */
8574 if (VECTOR_MEM_VSX_P (mode))
8575 return false;
8577 if (!worst_case)
8578 break;
8579 if (!TARGET_POWERPC64)
8580 extra = 4;
8581 else if (offset & 3)
8582 return false;
8583 break;
8585 case TFmode:
8586 case IFmode:
8587 case KFmode:
8588 case TDmode:
8589 case TImode:
8590 case PTImode:
8591 if (TARGET_E500_DOUBLE)
8592 return (SPE_CONST_OFFSET_OK (offset)
8593 && SPE_CONST_OFFSET_OK (offset + 8));
8595 extra = 8;
8596 if (!worst_case)
8597 break;
8598 if (!TARGET_POWERPC64)
8599 extra = 12;
8600 else if (offset & 3)
8601 return false;
8602 break;
8604 default:
8605 break;
8608 offset += 0x8000;
8609 return offset < 0x10000 - extra;
8612 bool
8613 legitimate_indexed_address_p (rtx x, int strict)
8615 rtx op0, op1;
8617 if (GET_CODE (x) != PLUS)
8618 return false;
8620 op0 = XEXP (x, 0);
8621 op1 = XEXP (x, 1);
8623 /* Recognize the rtl generated by reload which we know will later be
8624 replaced with proper base and index regs. */
8625 if (!strict
8626 && reload_in_progress
8627 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8628 && REG_P (op1))
8629 return true;
8631 return (REG_P (op0) && REG_P (op1)
8632 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8633 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8634 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8635 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8638 bool
8639 avoiding_indexed_address_p (machine_mode mode)
8641 /* Avoid indexed addressing for modes that have non-indexed
8642 load/store instruction forms. */
8643 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8646 bool
8647 legitimate_indirect_address_p (rtx x, int strict)
8649 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8652 bool
8653 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8655 if (!TARGET_MACHO || !flag_pic
8656 || mode != SImode || GET_CODE (x) != MEM)
8657 return false;
8658 x = XEXP (x, 0);
8660 if (GET_CODE (x) != LO_SUM)
8661 return false;
8662 if (GET_CODE (XEXP (x, 0)) != REG)
8663 return false;
8664 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8665 return false;
8666 x = XEXP (x, 1);
8668 return CONSTANT_P (x);
8671 static bool
8672 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8674 if (GET_CODE (x) != LO_SUM)
8675 return false;
8676 if (GET_CODE (XEXP (x, 0)) != REG)
8677 return false;
8678 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8679 return false;
8680 /* quad word addresses are restricted, and we can't use LO_SUM. */
8681 if (mode_supports_vsx_dform_quad (mode))
8682 return false;
8683 /* Restrict addressing for DI because of our SUBREG hackery. */
8684 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8685 return false;
8686 x = XEXP (x, 1);
8688 if (TARGET_ELF || TARGET_MACHO)
8690 bool large_toc_ok;
8692 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8693 return false;
8694 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8695 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8696 recognizes some LO_SUM addresses as valid although this
8697 function says opposite. In most cases, LRA through different
8698 transformations can generate correct code for address reloads.
8699 It can not manage only some LO_SUM cases. So we need to add
8700 code analogous to one in rs6000_legitimize_reload_address for
8701 LOW_SUM here saying that some addresses are still valid. */
8702 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8703 && small_toc_ref (x, VOIDmode));
8704 if (TARGET_TOC && ! large_toc_ok)
8705 return false;
8706 if (GET_MODE_NUNITS (mode) != 1)
8707 return false;
8708 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8709 && !(/* ??? Assume floating point reg based on mode? */
8710 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8711 && (mode == DFmode || mode == DDmode)))
8712 return false;
8714 return CONSTANT_P (x) || large_toc_ok;
8717 return false;
8721 /* Try machine-dependent ways of modifying an illegitimate address
8722 to be legitimate. If we find one, return the new, valid address.
8723 This is used from only one place: `memory_address' in explow.c.
8725 OLDX is the address as it was before break_out_memory_refs was
8726 called. In some cases it is useful to look at this to decide what
8727 needs to be done.
8729 It is always safe for this function to do nothing. It exists to
8730 recognize opportunities to optimize the output.
8732 On RS/6000, first check for the sum of a register with a constant
8733 integer that is out of range. If so, generate code to add the
8734 constant with the low-order 16 bits masked to the register and force
8735 this result into another register (this can be done with `cau').
8736 Then generate an address of REG+(CONST&0xffff), allowing for the
8737 possibility of bit 16 being a one.
8739 Then check for the sum of a register and something not constant, try to
8740 load the other things into a register and return the sum. */
8742 static rtx
8743 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8744 machine_mode mode)
8746 unsigned int extra;
8748 if (!reg_offset_addressing_ok_p (mode)
8749 || mode_supports_vsx_dform_quad (mode))
8751 if (virtual_stack_registers_memory_p (x))
8752 return x;
8754 /* In theory we should not be seeing addresses of the form reg+0,
8755 but just in case it is generated, optimize it away. */
8756 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8757 return force_reg (Pmode, XEXP (x, 0));
8759 /* For TImode with load/store quad, restrict addresses to just a single
8760 pointer, so it works with both GPRs and VSX registers. */
8761 /* Make sure both operands are registers. */
8762 else if (GET_CODE (x) == PLUS
8763 && (mode != TImode || !TARGET_VSX_TIMODE))
8764 return gen_rtx_PLUS (Pmode,
8765 force_reg (Pmode, XEXP (x, 0)),
8766 force_reg (Pmode, XEXP (x, 1)));
8767 else
8768 return force_reg (Pmode, x);
8770 if (GET_CODE (x) == SYMBOL_REF)
8772 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8773 if (model != 0)
8774 return rs6000_legitimize_tls_address (x, model);
8777 extra = 0;
8778 switch (mode)
8780 case TFmode:
8781 case TDmode:
8782 case TImode:
8783 case PTImode:
8784 case IFmode:
8785 case KFmode:
8786 /* As in legitimate_offset_address_p we do not assume
8787 worst-case. The mode here is just a hint as to the registers
8788 used. A TImode is usually in gprs, but may actually be in
8789 fprs. Leave worst-case scenario for reload to handle via
8790 insn constraints. PTImode is only GPRs. */
8791 extra = 8;
8792 break;
8793 default:
8794 break;
8797 if (GET_CODE (x) == PLUS
8798 && GET_CODE (XEXP (x, 0)) == REG
8799 && GET_CODE (XEXP (x, 1)) == CONST_INT
8800 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8801 >= 0x10000 - extra)
8802 && !(SPE_VECTOR_MODE (mode)
8803 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
8805 HOST_WIDE_INT high_int, low_int;
8806 rtx sum;
8807 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8808 if (low_int >= 0x8000 - extra)
8809 low_int = 0;
8810 high_int = INTVAL (XEXP (x, 1)) - low_int;
8811 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8812 GEN_INT (high_int)), 0);
8813 return plus_constant (Pmode, sum, low_int);
8815 else if (GET_CODE (x) == PLUS
8816 && GET_CODE (XEXP (x, 0)) == REG
8817 && GET_CODE (XEXP (x, 1)) != CONST_INT
8818 && GET_MODE_NUNITS (mode) == 1
8819 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8820 || (/* ??? Assume floating point reg based on mode? */
8821 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8822 && (mode == DFmode || mode == DDmode)))
8823 && !avoiding_indexed_address_p (mode))
8825 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8826 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8828 else if (SPE_VECTOR_MODE (mode)
8829 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
8831 if (mode == DImode)
8832 return x;
8833 /* We accept [reg + reg] and [reg + OFFSET]. */
8835 if (GET_CODE (x) == PLUS)
8837 rtx op1 = XEXP (x, 0);
8838 rtx op2 = XEXP (x, 1);
8839 rtx y;
8841 op1 = force_reg (Pmode, op1);
8843 if (GET_CODE (op2) != REG
8844 && (GET_CODE (op2) != CONST_INT
8845 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
8846 || (GET_MODE_SIZE (mode) > 8
8847 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
8848 op2 = force_reg (Pmode, op2);
8850 /* We can't always do [reg + reg] for these, because [reg +
8851 reg + offset] is not a legitimate addressing mode. */
8852 y = gen_rtx_PLUS (Pmode, op1, op2);
8854 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8855 return force_reg (Pmode, y);
8856 else
8857 return y;
8860 return force_reg (Pmode, x);
8862 else if ((TARGET_ELF
8863 #if TARGET_MACHO
8864 || !MACHO_DYNAMIC_NO_PIC_P
8865 #endif
8867 && TARGET_32BIT
8868 && TARGET_NO_TOC
8869 && ! flag_pic
8870 && GET_CODE (x) != CONST_INT
8871 && GET_CODE (x) != CONST_WIDE_INT
8872 && GET_CODE (x) != CONST_DOUBLE
8873 && CONSTANT_P (x)
8874 && GET_MODE_NUNITS (mode) == 1
8875 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8876 || (/* ??? Assume floating point reg based on mode? */
8877 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8878 && (mode == DFmode || mode == DDmode))))
8880 rtx reg = gen_reg_rtx (Pmode);
8881 if (TARGET_ELF)
8882 emit_insn (gen_elf_high (reg, x));
8883 else
8884 emit_insn (gen_macho_high (reg, x));
8885 return gen_rtx_LO_SUM (Pmode, reg, x);
8887 else if (TARGET_TOC
8888 && GET_CODE (x) == SYMBOL_REF
8889 && constant_pool_expr_p (x)
8890 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8891 return create_TOC_reference (x, NULL_RTX);
8892 else
8893 return x;
8896 /* Debug version of rs6000_legitimize_address. */
8897 static rtx
8898 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8900 rtx ret;
8901 rtx_insn *insns;
8903 start_sequence ();
8904 ret = rs6000_legitimize_address (x, oldx, mode);
8905 insns = get_insns ();
8906 end_sequence ();
8908 if (ret != x)
8910 fprintf (stderr,
8911 "\nrs6000_legitimize_address: mode %s, old code %s, "
8912 "new code %s, modified\n",
8913 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8914 GET_RTX_NAME (GET_CODE (ret)));
8916 fprintf (stderr, "Original address:\n");
8917 debug_rtx (x);
8919 fprintf (stderr, "oldx:\n");
8920 debug_rtx (oldx);
8922 fprintf (stderr, "New address:\n");
8923 debug_rtx (ret);
8925 if (insns)
8927 fprintf (stderr, "Insns added:\n");
8928 debug_rtx_list (insns, 20);
8931 else
8933 fprintf (stderr,
8934 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8935 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8937 debug_rtx (x);
8940 if (insns)
8941 emit_insn (insns);
8943 return ret;
8946 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8947 We need to emit DTP-relative relocations. */
8949 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8950 static void
8951 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8953 switch (size)
8955 case 4:
8956 fputs ("\t.long\t", file);
8957 break;
8958 case 8:
8959 fputs (DOUBLE_INT_ASM_OP, file);
8960 break;
8961 default:
8962 gcc_unreachable ();
8964 output_addr_const (file, x);
8965 if (TARGET_ELF)
8966 fputs ("@dtprel+0x8000", file);
8967 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8969 switch (SYMBOL_REF_TLS_MODEL (x))
8971 case 0:
8972 break;
8973 case TLS_MODEL_LOCAL_EXEC:
8974 fputs ("@le", file);
8975 break;
8976 case TLS_MODEL_INITIAL_EXEC:
8977 fputs ("@ie", file);
8978 break;
8979 case TLS_MODEL_GLOBAL_DYNAMIC:
8980 case TLS_MODEL_LOCAL_DYNAMIC:
8981 fputs ("@m", file);
8982 break;
8983 default:
8984 gcc_unreachable ();
8989 /* Return true if X is a symbol that refers to real (rather than emulated)
8990 TLS. */
8992 static bool
8993 rs6000_real_tls_symbol_ref_p (rtx x)
8995 return (GET_CODE (x) == SYMBOL_REF
8996 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8999 /* In the name of slightly smaller debug output, and to cater to
9000 general assembler lossage, recognize various UNSPEC sequences
9001 and turn them back into a direct symbol reference. */
9003 static rtx
9004 rs6000_delegitimize_address (rtx orig_x)
9006 rtx x, y, offset;
9008 orig_x = delegitimize_mem_from_attrs (orig_x);
9009 x = orig_x;
9010 if (MEM_P (x))
9011 x = XEXP (x, 0);
9013 y = x;
9014 if (TARGET_CMODEL != CMODEL_SMALL
9015 && GET_CODE (y) == LO_SUM)
9016 y = XEXP (y, 1);
9018 offset = NULL_RTX;
9019 if (GET_CODE (y) == PLUS
9020 && GET_MODE (y) == Pmode
9021 && CONST_INT_P (XEXP (y, 1)))
9023 offset = XEXP (y, 1);
9024 y = XEXP (y, 0);
9027 if (GET_CODE (y) == UNSPEC
9028 && XINT (y, 1) == UNSPEC_TOCREL)
9030 y = XVECEXP (y, 0, 0);
9032 #ifdef HAVE_AS_TLS
9033 /* Do not associate thread-local symbols with the original
9034 constant pool symbol. */
9035 if (TARGET_XCOFF
9036 && GET_CODE (y) == SYMBOL_REF
9037 && CONSTANT_POOL_ADDRESS_P (y)
9038 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9039 return orig_x;
9040 #endif
9042 if (offset != NULL_RTX)
9043 y = gen_rtx_PLUS (Pmode, y, offset);
9044 if (!MEM_P (orig_x))
9045 return y;
9046 else
9047 return replace_equiv_address_nv (orig_x, y);
9050 if (TARGET_MACHO
9051 && GET_CODE (orig_x) == LO_SUM
9052 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9054 y = XEXP (XEXP (orig_x, 1), 0);
9055 if (GET_CODE (y) == UNSPEC
9056 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9057 return XVECEXP (y, 0, 0);
9060 return orig_x;
9063 /* Return true if X shouldn't be emitted into the debug info.
9064 The linker doesn't like .toc section references from
9065 .debug_* sections, so reject .toc section symbols. */
9067 static bool
9068 rs6000_const_not_ok_for_debug_p (rtx x)
9070 if (GET_CODE (x) == SYMBOL_REF
9071 && CONSTANT_POOL_ADDRESS_P (x))
9073 rtx c = get_pool_constant (x);
9074 machine_mode cmode = get_pool_mode (x);
9075 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9076 return true;
9079 return false;
9083 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9085 static bool
9086 rs6000_legitimate_combined_insn (rtx_insn *insn)
9088 int icode = INSN_CODE (insn);
9090 /* Reject creating doloop insns. Combine should not be allowed
9091 to create these for a number of reasons:
9092 1) In a nested loop, if combine creates one of these in an
9093 outer loop and the register allocator happens to allocate ctr
9094 to the outer loop insn, then the inner loop can't use ctr.
9095 Inner loops ought to be more highly optimized.
9096 2) Combine often wants to create one of these from what was
9097 originally a three insn sequence, first combining the three
9098 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9099 allocated ctr, the splitter takes use back to the three insn
9100 sequence. It's better to stop combine at the two insn
9101 sequence.
9102 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9103 insns, the register allocator sometimes uses floating point
9104 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9105 jump insn and output reloads are not implemented for jumps,
9106 the ctrsi/ctrdi splitters need to handle all possible cases.
9107 That's a pain, and it gets to be seriously difficult when a
9108 splitter that runs after reload needs memory to transfer from
9109 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9110 for the difficult case. It's better to not create problems
9111 in the first place. */
9112 if (icode != CODE_FOR_nothing
9113 && (icode == CODE_FOR_ctrsi_internal1
9114 || icode == CODE_FOR_ctrdi_internal1
9115 || icode == CODE_FOR_ctrsi_internal2
9116 || icode == CODE_FOR_ctrdi_internal2
9117 || icode == CODE_FOR_ctrsi_internal3
9118 || icode == CODE_FOR_ctrdi_internal3
9119 || icode == CODE_FOR_ctrsi_internal4
9120 || icode == CODE_FOR_ctrdi_internal4))
9121 return false;
9123 return true;
9126 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9128 static GTY(()) rtx rs6000_tls_symbol;
9129 static rtx
9130 rs6000_tls_get_addr (void)
9132 if (!rs6000_tls_symbol)
9133 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9135 return rs6000_tls_symbol;
9138 /* Construct the SYMBOL_REF for TLS GOT references. */
9140 static GTY(()) rtx rs6000_got_symbol;
9141 static rtx
9142 rs6000_got_sym (void)
9144 if (!rs6000_got_symbol)
9146 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9147 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9148 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9151 return rs6000_got_symbol;
9154 /* AIX Thread-Local Address support. */
9156 static rtx
9157 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9159 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9160 const char *name;
9161 char *tlsname;
9163 name = XSTR (addr, 0);
9164 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9165 or the symbol will be in TLS private data section. */
9166 if (name[strlen (name) - 1] != ']'
9167 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9168 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9170 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9171 strcpy (tlsname, name);
9172 strcat (tlsname,
9173 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9174 tlsaddr = copy_rtx (addr);
9175 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9177 else
9178 tlsaddr = addr;
9180 /* Place addr into TOC constant pool. */
9181 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9183 /* Output the TOC entry and create the MEM referencing the value. */
9184 if (constant_pool_expr_p (XEXP (sym, 0))
9185 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9187 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9188 mem = gen_const_mem (Pmode, tocref);
9189 set_mem_alias_set (mem, get_TOC_alias_set ());
9191 else
9192 return sym;
9194 /* Use global-dynamic for local-dynamic. */
9195 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9196 || model == TLS_MODEL_LOCAL_DYNAMIC)
9198 /* Create new TOC reference for @m symbol. */
9199 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9200 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9201 strcpy (tlsname, "*LCM");
9202 strcat (tlsname, name + 3);
9203 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9204 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9205 tocref = create_TOC_reference (modaddr, NULL_RTX);
9206 rtx modmem = gen_const_mem (Pmode, tocref);
9207 set_mem_alias_set (modmem, get_TOC_alias_set ());
9209 rtx modreg = gen_reg_rtx (Pmode);
9210 emit_insn (gen_rtx_SET (modreg, modmem));
9212 tmpreg = gen_reg_rtx (Pmode);
9213 emit_insn (gen_rtx_SET (tmpreg, mem));
9215 dest = gen_reg_rtx (Pmode);
9216 if (TARGET_32BIT)
9217 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9218 else
9219 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9220 return dest;
9222 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9223 else if (TARGET_32BIT)
9225 tlsreg = gen_reg_rtx (SImode);
9226 emit_insn (gen_tls_get_tpointer (tlsreg));
9228 else
9229 tlsreg = gen_rtx_REG (DImode, 13);
9231 /* Load the TOC value into temporary register. */
9232 tmpreg = gen_reg_rtx (Pmode);
9233 emit_insn (gen_rtx_SET (tmpreg, mem));
9234 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9235 gen_rtx_MINUS (Pmode, addr, tlsreg));
9237 /* Add TOC symbol value to TLS pointer. */
9238 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9240 return dest;
9243 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9244 this (thread-local) address. */
9246 static rtx
9247 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9249 rtx dest, insn;
9251 if (TARGET_XCOFF)
9252 return rs6000_legitimize_tls_address_aix (addr, model);
9254 dest = gen_reg_rtx (Pmode);
9255 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9257 rtx tlsreg;
9259 if (TARGET_64BIT)
9261 tlsreg = gen_rtx_REG (Pmode, 13);
9262 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9264 else
9266 tlsreg = gen_rtx_REG (Pmode, 2);
9267 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9269 emit_insn (insn);
9271 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9273 rtx tlsreg, tmp;
9275 tmp = gen_reg_rtx (Pmode);
9276 if (TARGET_64BIT)
9278 tlsreg = gen_rtx_REG (Pmode, 13);
9279 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9281 else
9283 tlsreg = gen_rtx_REG (Pmode, 2);
9284 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9286 emit_insn (insn);
9287 if (TARGET_64BIT)
9288 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9289 else
9290 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9291 emit_insn (insn);
9293 else
9295 rtx r3, got, tga, tmp1, tmp2, call_insn;
9297 /* We currently use relocations like @got@tlsgd for tls, which
9298 means the linker will handle allocation of tls entries, placing
9299 them in the .got section. So use a pointer to the .got section,
9300 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9301 or to secondary GOT sections used by 32-bit -fPIC. */
9302 if (TARGET_64BIT)
9303 got = gen_rtx_REG (Pmode, 2);
9304 else
9306 if (flag_pic == 1)
9307 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9308 else
9310 rtx gsym = rs6000_got_sym ();
9311 got = gen_reg_rtx (Pmode);
9312 if (flag_pic == 0)
9313 rs6000_emit_move (got, gsym, Pmode);
9314 else
9316 rtx mem, lab;
9318 tmp1 = gen_reg_rtx (Pmode);
9319 tmp2 = gen_reg_rtx (Pmode);
9320 mem = gen_const_mem (Pmode, tmp1);
9321 lab = gen_label_rtx ();
9322 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9323 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9324 if (TARGET_LINK_STACK)
9325 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9326 emit_move_insn (tmp2, mem);
9327 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9328 set_unique_reg_note (last, REG_EQUAL, gsym);
9333 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9335 tga = rs6000_tls_get_addr ();
9336 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9337 1, const0_rtx, Pmode);
9339 r3 = gen_rtx_REG (Pmode, 3);
9340 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9342 if (TARGET_64BIT)
9343 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9344 else
9345 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9347 else if (DEFAULT_ABI == ABI_V4)
9348 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9349 else
9350 gcc_unreachable ();
9351 call_insn = last_call_insn ();
9352 PATTERN (call_insn) = insn;
9353 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9354 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9355 pic_offset_table_rtx);
9357 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9359 tga = rs6000_tls_get_addr ();
9360 tmp1 = gen_reg_rtx (Pmode);
9361 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9362 1, const0_rtx, Pmode);
9364 r3 = gen_rtx_REG (Pmode, 3);
9365 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9367 if (TARGET_64BIT)
9368 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9369 else
9370 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9372 else if (DEFAULT_ABI == ABI_V4)
9373 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9374 else
9375 gcc_unreachable ();
9376 call_insn = last_call_insn ();
9377 PATTERN (call_insn) = insn;
9378 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9379 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9380 pic_offset_table_rtx);
9382 if (rs6000_tls_size == 16)
9384 if (TARGET_64BIT)
9385 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9386 else
9387 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9389 else if (rs6000_tls_size == 32)
9391 tmp2 = gen_reg_rtx (Pmode);
9392 if (TARGET_64BIT)
9393 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9394 else
9395 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9396 emit_insn (insn);
9397 if (TARGET_64BIT)
9398 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9399 else
9400 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9402 else
9404 tmp2 = gen_reg_rtx (Pmode);
9405 if (TARGET_64BIT)
9406 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9407 else
9408 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9409 emit_insn (insn);
9410 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9412 emit_insn (insn);
9414 else
9416 /* IE, or 64-bit offset LE. */
9417 tmp2 = gen_reg_rtx (Pmode);
9418 if (TARGET_64BIT)
9419 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9420 else
9421 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9422 emit_insn (insn);
9423 if (TARGET_64BIT)
9424 insn = gen_tls_tls_64 (dest, tmp2, addr);
9425 else
9426 insn = gen_tls_tls_32 (dest, tmp2, addr);
9427 emit_insn (insn);
9431 return dest;
9434 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9436 static bool
9437 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9439 if (GET_CODE (x) == HIGH
9440 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9441 return true;
9443 /* A TLS symbol in the TOC cannot contain a sum. */
9444 if (GET_CODE (x) == CONST
9445 && GET_CODE (XEXP (x, 0)) == PLUS
9446 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9447 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9448 return true;
9450 /* Do not place an ELF TLS symbol in the constant pool. */
9451 return TARGET_ELF && tls_referenced_p (x);
9454 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9455 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9456 can be addressed relative to the toc pointer. */
9458 static bool
9459 use_toc_relative_ref (rtx sym, machine_mode mode)
9461 return ((constant_pool_expr_p (sym)
9462 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9463 get_pool_mode (sym)))
9464 || (TARGET_CMODEL == CMODEL_MEDIUM
9465 && SYMBOL_REF_LOCAL_P (sym)
9466 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9469 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9470 replace the input X, or the original X if no replacement is called for.
9471 The output parameter *WIN is 1 if the calling macro should goto WIN,
9472 0 if it should not.
9474 For RS/6000, we wish to handle large displacements off a base
9475 register by splitting the addend across an addiu/addis and the mem insn.
9476 This cuts number of extra insns needed from 3 to 1.
9478 On Darwin, we use this to generate code for floating point constants.
9479 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9480 The Darwin code is inside #if TARGET_MACHO because only then are the
9481 machopic_* functions defined. */
9482 static rtx
9483 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9484 int opnum, int type,
9485 int ind_levels ATTRIBUTE_UNUSED, int *win)
9487 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9488 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9490 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9491 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9492 if (reg_offset_p
9493 && opnum == 1
9494 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9495 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9496 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9497 && TARGET_P9_VECTOR)
9498 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9499 && TARGET_P9_VECTOR)))
9500 reg_offset_p = false;
9502 /* We must recognize output that we have already generated ourselves. */
9503 if (GET_CODE (x) == PLUS
9504 && GET_CODE (XEXP (x, 0)) == PLUS
9505 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9506 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9507 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9509 if (TARGET_DEBUG_ADDR)
9511 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9512 debug_rtx (x);
9514 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9515 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9516 opnum, (enum reload_type) type);
9517 *win = 1;
9518 return x;
9521 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9522 if (GET_CODE (x) == LO_SUM
9523 && GET_CODE (XEXP (x, 0)) == HIGH)
9525 if (TARGET_DEBUG_ADDR)
9527 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9528 debug_rtx (x);
9530 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9531 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9532 opnum, (enum reload_type) type);
9533 *win = 1;
9534 return x;
9537 #if TARGET_MACHO
9538 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9539 && GET_CODE (x) == LO_SUM
9540 && GET_CODE (XEXP (x, 0)) == PLUS
9541 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9542 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9543 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9544 && machopic_operand_p (XEXP (x, 1)))
9546 /* Result of previous invocation of this function on Darwin
9547 floating point constant. */
9548 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9549 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9550 opnum, (enum reload_type) type);
9551 *win = 1;
9552 return x;
9554 #endif
9556 if (TARGET_CMODEL != CMODEL_SMALL
9557 && reg_offset_p
9558 && !quad_offset_p
9559 && small_toc_ref (x, VOIDmode))
9561 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9562 x = gen_rtx_LO_SUM (Pmode, hi, x);
9563 if (TARGET_DEBUG_ADDR)
9565 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9566 debug_rtx (x);
9568 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9569 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9570 opnum, (enum reload_type) type);
9571 *win = 1;
9572 return x;
9575 if (GET_CODE (x) == PLUS
9576 && REG_P (XEXP (x, 0))
9577 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9578 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9579 && CONST_INT_P (XEXP (x, 1))
9580 && reg_offset_p
9581 && !SPE_VECTOR_MODE (mode)
9582 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9583 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9585 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9586 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9587 HOST_WIDE_INT high
9588 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9590 /* Check for 32-bit overflow or quad addresses with one of the
9591 four least significant bits set. */
9592 if (high + low != val
9593 || (quad_offset_p && (low & 0xf)))
9595 *win = 0;
9596 return x;
9599 /* Reload the high part into a base reg; leave the low part
9600 in the mem directly. */
9602 x = gen_rtx_PLUS (GET_MODE (x),
9603 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9604 GEN_INT (high)),
9605 GEN_INT (low));
9607 if (TARGET_DEBUG_ADDR)
9609 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9610 debug_rtx (x);
9612 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9613 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9614 opnum, (enum reload_type) type);
9615 *win = 1;
9616 return x;
9619 if (GET_CODE (x) == SYMBOL_REF
9620 && reg_offset_p
9621 && !quad_offset_p
9622 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9623 && !SPE_VECTOR_MODE (mode)
9624 #if TARGET_MACHO
9625 && DEFAULT_ABI == ABI_DARWIN
9626 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9627 && machopic_symbol_defined_p (x)
9628 #else
9629 && DEFAULT_ABI == ABI_V4
9630 && !flag_pic
9631 #endif
9632 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9633 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9634 without fprs.
9635 ??? Assume floating point reg based on mode? This assumption is
9636 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9637 where reload ends up doing a DFmode load of a constant from
9638 mem using two gprs. Unfortunately, at this point reload
9639 hasn't yet selected regs so poking around in reload data
9640 won't help and even if we could figure out the regs reliably,
9641 we'd still want to allow this transformation when the mem is
9642 naturally aligned. Since we say the address is good here, we
9643 can't disable offsets from LO_SUMs in mem_operand_gpr.
9644 FIXME: Allow offset from lo_sum for other modes too, when
9645 mem is sufficiently aligned.
9647 Also disallow this if the type can go in VMX/Altivec registers, since
9648 those registers do not have d-form (reg+offset) address modes. */
9649 && !reg_addr[mode].scalar_in_vmx_p
9650 && mode != TFmode
9651 && mode != TDmode
9652 && mode != IFmode
9653 && mode != KFmode
9654 && (mode != TImode || !TARGET_VSX_TIMODE)
9655 && mode != PTImode
9656 && (mode != DImode || TARGET_POWERPC64)
9657 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9658 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9660 #if TARGET_MACHO
9661 if (flag_pic)
9663 rtx offset = machopic_gen_offset (x);
9664 x = gen_rtx_LO_SUM (GET_MODE (x),
9665 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9666 gen_rtx_HIGH (Pmode, offset)), offset);
9668 else
9669 #endif
9670 x = gen_rtx_LO_SUM (GET_MODE (x),
9671 gen_rtx_HIGH (Pmode, x), x);
9673 if (TARGET_DEBUG_ADDR)
9675 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9676 debug_rtx (x);
9678 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9679 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9680 opnum, (enum reload_type) type);
9681 *win = 1;
9682 return x;
9685 /* Reload an offset address wrapped by an AND that represents the
9686 masking of the lower bits. Strip the outer AND and let reload
9687 convert the offset address into an indirect address. For VSX,
9688 force reload to create the address with an AND in a separate
9689 register, because we can't guarantee an altivec register will
9690 be used. */
9691 if (VECTOR_MEM_ALTIVEC_P (mode)
9692 && GET_CODE (x) == AND
9693 && GET_CODE (XEXP (x, 0)) == PLUS
9694 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9695 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9696 && GET_CODE (XEXP (x, 1)) == CONST_INT
9697 && INTVAL (XEXP (x, 1)) == -16)
9699 x = XEXP (x, 0);
9700 *win = 1;
9701 return x;
9704 if (TARGET_TOC
9705 && reg_offset_p
9706 && !quad_offset_p
9707 && GET_CODE (x) == SYMBOL_REF
9708 && use_toc_relative_ref (x, mode))
9710 x = create_TOC_reference (x, NULL_RTX);
9711 if (TARGET_CMODEL != CMODEL_SMALL)
9713 if (TARGET_DEBUG_ADDR)
9715 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9716 debug_rtx (x);
9718 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9719 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9720 opnum, (enum reload_type) type);
9722 *win = 1;
9723 return x;
9725 *win = 0;
9726 return x;
9729 /* Debug version of rs6000_legitimize_reload_address. */
9730 static rtx
9731 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9732 int opnum, int type,
9733 int ind_levels, int *win)
9735 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9736 ind_levels, win);
9737 fprintf (stderr,
9738 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9739 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9740 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9741 debug_rtx (x);
9743 if (x == ret)
9744 fprintf (stderr, "Same address returned\n");
9745 else if (!ret)
9746 fprintf (stderr, "NULL returned\n");
9747 else
9749 fprintf (stderr, "New address:\n");
9750 debug_rtx (ret);
9753 return ret;
9756 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9757 that is a valid memory address for an instruction.
9758 The MODE argument is the machine mode for the MEM expression
9759 that wants to use this address.
9761 On the RS/6000, there are four valid address: a SYMBOL_REF that
9762 refers to a constant pool entry of an address (or the sum of it
9763 plus a constant), a short (16-bit signed) constant plus a register,
9764 the sum of two registers, or a register indirect, possibly with an
9765 auto-increment. For DFmode, DDmode and DImode with a constant plus
9766 register, we must ensure that both words are addressable or PowerPC64
9767 with offset word aligned.
9769 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9770 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9771 because adjacent memory cells are accessed by adding word-sized offsets
9772 during assembly output. */
9773 static bool
9774 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9776 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9777 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9779 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9780 if (VECTOR_MEM_ALTIVEC_P (mode)
9781 && GET_CODE (x) == AND
9782 && GET_CODE (XEXP (x, 1)) == CONST_INT
9783 && INTVAL (XEXP (x, 1)) == -16)
9784 x = XEXP (x, 0);
9786 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9787 return 0;
9788 if (legitimate_indirect_address_p (x, reg_ok_strict))
9789 return 1;
9790 if (TARGET_UPDATE
9791 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9792 && mode_supports_pre_incdec_p (mode)
9793 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9794 return 1;
9795 /* Handle restricted vector d-form offsets in ISA 3.0. */
9796 if (quad_offset_p)
9798 if (quad_address_p (x, mode, reg_ok_strict))
9799 return 1;
9801 else if (virtual_stack_registers_memory_p (x))
9802 return 1;
9804 else if (reg_offset_p)
9806 if (legitimate_small_data_p (mode, x))
9807 return 1;
9808 if (legitimate_constant_pool_address_p (x, mode,
9809 reg_ok_strict || lra_in_progress))
9810 return 1;
9811 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9812 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9813 return 1;
9816 /* For TImode, if we have TImode in VSX registers, only allow register
9817 indirect addresses. This will allow the values to go in either GPRs
9818 or VSX registers without reloading. The vector types would tend to
9819 go into VSX registers, so we allow REG+REG, while TImode seems
9820 somewhat split, in that some uses are GPR based, and some VSX based. */
9821 /* FIXME: We could loosen this by changing the following to
9822 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9823 but currently we cannot allow REG+REG addressing for TImode. See
9824 PR72827 for complete details on how this ends up hoodwinking DSE. */
9825 if (mode == TImode && TARGET_VSX_TIMODE)
9826 return 0;
9827 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9828 if (! reg_ok_strict
9829 && reg_offset_p
9830 && GET_CODE (x) == PLUS
9831 && GET_CODE (XEXP (x, 0)) == REG
9832 && (XEXP (x, 0) == virtual_stack_vars_rtx
9833 || XEXP (x, 0) == arg_pointer_rtx)
9834 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9835 return 1;
9836 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9837 return 1;
9838 if (!FLOAT128_2REG_P (mode)
9839 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9840 || TARGET_POWERPC64
9841 || (mode != DFmode && mode != DDmode)
9842 || (TARGET_E500_DOUBLE && mode != DDmode))
9843 && (TARGET_POWERPC64 || mode != DImode)
9844 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9845 && mode != PTImode
9846 && !avoiding_indexed_address_p (mode)
9847 && legitimate_indexed_address_p (x, reg_ok_strict))
9848 return 1;
9849 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9850 && mode_supports_pre_modify_p (mode)
9851 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9852 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9853 reg_ok_strict, false)
9854 || (!avoiding_indexed_address_p (mode)
9855 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9856 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9857 return 1;
9858 if (reg_offset_p && !quad_offset_p
9859 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9860 return 1;
9861 return 0;
9864 /* Debug version of rs6000_legitimate_address_p. */
9865 static bool
9866 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9867 bool reg_ok_strict)
9869 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9870 fprintf (stderr,
9871 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9872 "strict = %d, reload = %s, code = %s\n",
9873 ret ? "true" : "false",
9874 GET_MODE_NAME (mode),
9875 reg_ok_strict,
9876 (reload_completed
9877 ? "after"
9878 : (reload_in_progress ? "progress" : "before")),
9879 GET_RTX_NAME (GET_CODE (x)));
9880 debug_rtx (x);
9882 return ret;
9885 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9887 static bool
9888 rs6000_mode_dependent_address_p (const_rtx addr,
9889 addr_space_t as ATTRIBUTE_UNUSED)
9891 return rs6000_mode_dependent_address_ptr (addr);
9894 /* Go to LABEL if ADDR (a legitimate address expression)
9895 has an effect that depends on the machine mode it is used for.
9897 On the RS/6000 this is true of all integral offsets (since AltiVec
9898 and VSX modes don't allow them) or is a pre-increment or decrement.
9900 ??? Except that due to conceptual problems in offsettable_address_p
9901 we can't really report the problems of integral offsets. So leave
9902 this assuming that the adjustable offset must be valid for the
9903 sub-words of a TFmode operand, which is what we had before. */
9905 static bool
9906 rs6000_mode_dependent_address (const_rtx addr)
9908 switch (GET_CODE (addr))
9910 case PLUS:
9911 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9912 is considered a legitimate address before reload, so there
9913 are no offset restrictions in that case. Note that this
9914 condition is safe in strict mode because any address involving
9915 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9916 been rejected as illegitimate. */
9917 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9918 && XEXP (addr, 0) != arg_pointer_rtx
9919 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9921 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9922 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9924 break;
9926 case LO_SUM:
9927 /* Anything in the constant pool is sufficiently aligned that
9928 all bytes have the same high part address. */
9929 return !legitimate_constant_pool_address_p (addr, QImode, false);
9931 /* Auto-increment cases are now treated generically in recog.c. */
9932 case PRE_MODIFY:
9933 return TARGET_UPDATE;
9935 /* AND is only allowed in Altivec loads. */
9936 case AND:
9937 return true;
9939 default:
9940 break;
9943 return false;
9946 /* Debug version of rs6000_mode_dependent_address. */
9947 static bool
9948 rs6000_debug_mode_dependent_address (const_rtx addr)
9950 bool ret = rs6000_mode_dependent_address (addr);
9952 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9953 ret ? "true" : "false");
9954 debug_rtx (addr);
9956 return ret;
9959 /* Implement FIND_BASE_TERM. */
9962 rs6000_find_base_term (rtx op)
9964 rtx base;
9966 base = op;
9967 if (GET_CODE (base) == CONST)
9968 base = XEXP (base, 0);
9969 if (GET_CODE (base) == PLUS)
9970 base = XEXP (base, 0);
9971 if (GET_CODE (base) == UNSPEC)
9972 switch (XINT (base, 1))
9974 case UNSPEC_TOCREL:
9975 case UNSPEC_MACHOPIC_OFFSET:
9976 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9977 for aliasing purposes. */
9978 return XVECEXP (base, 0, 0);
9981 return op;
9984 /* More elaborate version of recog's offsettable_memref_p predicate
9985 that works around the ??? note of rs6000_mode_dependent_address.
9986 In particular it accepts
9988 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9990 in 32-bit mode, that the recog predicate rejects. */
9992 static bool
9993 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9995 bool worst_case;
9997 if (!MEM_P (op))
9998 return false;
10000 /* First mimic offsettable_memref_p. */
10001 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10002 return true;
10004 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10005 the latter predicate knows nothing about the mode of the memory
10006 reference and, therefore, assumes that it is the largest supported
10007 mode (TFmode). As a consequence, legitimate offsettable memory
10008 references are rejected. rs6000_legitimate_offset_address_p contains
10009 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10010 at least with a little bit of help here given that we know the
10011 actual registers used. */
10012 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10013 || GET_MODE_SIZE (reg_mode) == 4);
10014 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10015 true, worst_case);
10018 /* Determine the reassociation width to be used in reassociate_bb.
10019 This takes into account how many parallel operations we
10020 can actually do of a given type, and also the latency.
10022 int add/sub 6/cycle
10023 mul 2/cycle
10024 vect add/sub/mul 2/cycle
10025 fp add/sub/mul 2/cycle
10026 dfp 1/cycle
10029 static int
10030 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10031 enum machine_mode mode)
10033 switch (rs6000_cpu)
10035 case PROCESSOR_POWER8:
10036 case PROCESSOR_POWER9:
10037 if (DECIMAL_FLOAT_MODE_P (mode))
10038 return 1;
10039 if (VECTOR_MODE_P (mode))
10040 return 4;
10041 if (INTEGRAL_MODE_P (mode))
10042 return opc == MULT_EXPR ? 4 : 6;
10043 if (FLOAT_MODE_P (mode))
10044 return 4;
10045 break;
10046 default:
10047 break;
10049 return 1;
10052 /* Change register usage conditional on target flags. */
10053 static void
10054 rs6000_conditional_register_usage (void)
10056 int i;
10058 if (TARGET_DEBUG_TARGET)
10059 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10061 /* Set MQ register fixed (already call_used) so that it will not be
10062 allocated. */
10063 fixed_regs[64] = 1;
10065 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10066 if (TARGET_64BIT)
10067 fixed_regs[13] = call_used_regs[13]
10068 = call_really_used_regs[13] = 1;
10070 /* Conditionally disable FPRs. */
10071 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
10072 for (i = 32; i < 64; i++)
10073 fixed_regs[i] = call_used_regs[i]
10074 = call_really_used_regs[i] = 1;
10076 /* The TOC register is not killed across calls in a way that is
10077 visible to the compiler. */
10078 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10079 call_really_used_regs[2] = 0;
10081 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10082 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10084 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10085 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10086 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10087 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10089 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10090 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10091 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10092 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10094 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10095 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10096 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10098 if (TARGET_SPE)
10100 global_regs[SPEFSCR_REGNO] = 1;
10101 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10102 registers in prologues and epilogues. We no longer use r14
10103 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10104 pool for link-compatibility with older versions of GCC. Once
10105 "old" code has died out, we can return r14 to the allocation
10106 pool. */
10107 fixed_regs[14]
10108 = call_used_regs[14]
10109 = call_really_used_regs[14] = 1;
10112 if (!TARGET_ALTIVEC && !TARGET_VSX)
10114 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10115 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10116 call_really_used_regs[VRSAVE_REGNO] = 1;
10119 if (TARGET_ALTIVEC || TARGET_VSX)
10120 global_regs[VSCR_REGNO] = 1;
10122 if (TARGET_ALTIVEC_ABI)
10124 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10125 call_used_regs[i] = call_really_used_regs[i] = 1;
10127 /* AIX reserves VR20:31 in non-extended ABI mode. */
10128 if (TARGET_XCOFF)
10129 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10130 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10135 /* Output insns to set DEST equal to the constant SOURCE as a series of
10136 lis, ori and shl instructions and return TRUE. */
10138 bool
10139 rs6000_emit_set_const (rtx dest, rtx source)
10141 machine_mode mode = GET_MODE (dest);
10142 rtx temp, set;
10143 rtx_insn *insn;
10144 HOST_WIDE_INT c;
10146 gcc_checking_assert (CONST_INT_P (source));
10147 c = INTVAL (source);
10148 switch (mode)
10150 case QImode:
10151 case HImode:
10152 emit_insn (gen_rtx_SET (dest, source));
10153 return true;
10155 case SImode:
10156 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10158 emit_insn (gen_rtx_SET (copy_rtx (temp),
10159 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10160 emit_insn (gen_rtx_SET (dest,
10161 gen_rtx_IOR (SImode, copy_rtx (temp),
10162 GEN_INT (c & 0xffff))));
10163 break;
10165 case DImode:
10166 if (!TARGET_POWERPC64)
10168 rtx hi, lo;
10170 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10171 DImode);
10172 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10173 DImode);
10174 emit_move_insn (hi, GEN_INT (c >> 32));
10175 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10176 emit_move_insn (lo, GEN_INT (c));
10178 else
10179 rs6000_emit_set_long_const (dest, c);
10180 break;
10182 default:
10183 gcc_unreachable ();
10186 insn = get_last_insn ();
10187 set = single_set (insn);
10188 if (! CONSTANT_P (SET_SRC (set)))
10189 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10191 return true;
10194 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10195 Output insns to set DEST equal to the constant C as a series of
10196 lis, ori and shl instructions. */
10198 static void
10199 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10201 rtx temp;
10202 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10204 ud1 = c & 0xffff;
10205 c = c >> 16;
10206 ud2 = c & 0xffff;
10207 c = c >> 16;
10208 ud3 = c & 0xffff;
10209 c = c >> 16;
10210 ud4 = c & 0xffff;
10212 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10213 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10214 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10216 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10217 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10219 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10221 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10222 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10223 if (ud1 != 0)
10224 emit_move_insn (dest,
10225 gen_rtx_IOR (DImode, copy_rtx (temp),
10226 GEN_INT (ud1)));
10228 else if (ud3 == 0 && ud4 == 0)
10230 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10232 gcc_assert (ud2 & 0x8000);
10233 emit_move_insn (copy_rtx (temp),
10234 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10235 if (ud1 != 0)
10236 emit_move_insn (copy_rtx (temp),
10237 gen_rtx_IOR (DImode, copy_rtx (temp),
10238 GEN_INT (ud1)));
10239 emit_move_insn (dest,
10240 gen_rtx_ZERO_EXTEND (DImode,
10241 gen_lowpart (SImode,
10242 copy_rtx (temp))));
10244 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10245 || (ud4 == 0 && ! (ud3 & 0x8000)))
10247 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10249 emit_move_insn (copy_rtx (temp),
10250 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10251 if (ud2 != 0)
10252 emit_move_insn (copy_rtx (temp),
10253 gen_rtx_IOR (DImode, copy_rtx (temp),
10254 GEN_INT (ud2)));
10255 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10256 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10257 GEN_INT (16)));
10258 if (ud1 != 0)
10259 emit_move_insn (dest,
10260 gen_rtx_IOR (DImode, copy_rtx (temp),
10261 GEN_INT (ud1)));
10263 else
10265 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10267 emit_move_insn (copy_rtx (temp),
10268 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10269 if (ud3 != 0)
10270 emit_move_insn (copy_rtx (temp),
10271 gen_rtx_IOR (DImode, copy_rtx (temp),
10272 GEN_INT (ud3)));
10274 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10275 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10276 GEN_INT (32)));
10277 if (ud2 != 0)
10278 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10279 gen_rtx_IOR (DImode, copy_rtx (temp),
10280 GEN_INT (ud2 << 16)));
10281 if (ud1 != 0)
10282 emit_move_insn (dest,
10283 gen_rtx_IOR (DImode, copy_rtx (temp),
10284 GEN_INT (ud1)));
10288 /* Helper for the following. Get rid of [r+r] memory refs
10289 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10291 static void
10292 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10294 if (reload_in_progress)
10295 return;
10297 if (GET_CODE (operands[0]) == MEM
10298 && GET_CODE (XEXP (operands[0], 0)) != REG
10299 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10300 GET_MODE (operands[0]), false))
10301 operands[0]
10302 = replace_equiv_address (operands[0],
10303 copy_addr_to_reg (XEXP (operands[0], 0)));
10305 if (GET_CODE (operands[1]) == MEM
10306 && GET_CODE (XEXP (operands[1], 0)) != REG
10307 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10308 GET_MODE (operands[1]), false))
10309 operands[1]
10310 = replace_equiv_address (operands[1],
10311 copy_addr_to_reg (XEXP (operands[1], 0)));
10314 /* Generate a vector of constants to permute MODE for a little-endian
10315 storage operation by swapping the two halves of a vector. */
10316 static rtvec
10317 rs6000_const_vec (machine_mode mode)
10319 int i, subparts;
10320 rtvec v;
10322 switch (mode)
10324 case V1TImode:
10325 subparts = 1;
10326 break;
10327 case V2DFmode:
10328 case V2DImode:
10329 subparts = 2;
10330 break;
10331 case V4SFmode:
10332 case V4SImode:
10333 subparts = 4;
10334 break;
10335 case V8HImode:
10336 subparts = 8;
10337 break;
10338 case V16QImode:
10339 subparts = 16;
10340 break;
10341 default:
10342 gcc_unreachable();
10345 v = rtvec_alloc (subparts);
10347 for (i = 0; i < subparts / 2; ++i)
10348 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10349 for (i = subparts / 2; i < subparts; ++i)
10350 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10352 return v;
10355 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10356 for a VSX load or store operation. */
10358 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10360 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10361 128-bit integers if they are allowed in VSX registers. */
10362 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
10363 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10364 else
10366 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10367 return gen_rtx_VEC_SELECT (mode, source, par);
10371 /* Emit a little-endian load from vector memory location SOURCE to VSX
10372 register DEST in mode MODE. The load is done with two permuting
10373 insn's that represent an lxvd2x and xxpermdi. */
10374 void
10375 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10377 rtx tmp, permute_mem, permute_reg;
10379 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10380 V1TImode). */
10381 if (mode == TImode || mode == V1TImode)
10383 mode = V2DImode;
10384 dest = gen_lowpart (V2DImode, dest);
10385 source = adjust_address (source, V2DImode, 0);
10388 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10389 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10390 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10391 emit_insn (gen_rtx_SET (tmp, permute_mem));
10392 emit_insn (gen_rtx_SET (dest, permute_reg));
10395 /* Emit a little-endian store to vector memory location DEST from VSX
10396 register SOURCE in mode MODE. The store is done with two permuting
10397 insn's that represent an xxpermdi and an stxvd2x. */
10398 void
10399 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10401 rtx tmp, permute_src, permute_tmp;
10403 /* This should never be called during or after reload, because it does
10404 not re-permute the source register. It is intended only for use
10405 during expand. */
10406 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10408 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10409 V1TImode). */
10410 if (mode == TImode || mode == V1TImode)
10412 mode = V2DImode;
10413 dest = adjust_address (dest, V2DImode, 0);
10414 source = gen_lowpart (V2DImode, source);
10417 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10418 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10419 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10420 emit_insn (gen_rtx_SET (tmp, permute_src));
10421 emit_insn (gen_rtx_SET (dest, permute_tmp));
10424 /* Emit a sequence representing a little-endian VSX load or store,
10425 moving data from SOURCE to DEST in mode MODE. This is done
10426 separately from rs6000_emit_move to ensure it is called only
10427 during expand. LE VSX loads and stores introduced later are
10428 handled with a split. The expand-time RTL generation allows
10429 us to optimize away redundant pairs of register-permutes. */
10430 void
10431 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10433 gcc_assert (!BYTES_BIG_ENDIAN
10434 && VECTOR_MEM_VSX_P (mode)
10435 && !TARGET_P9_VECTOR
10436 && !gpr_or_gpr_p (dest, source)
10437 && (MEM_P (source) ^ MEM_P (dest)));
10439 if (MEM_P (source))
10441 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10442 rs6000_emit_le_vsx_load (dest, source, mode);
10444 else
10446 if (!REG_P (source))
10447 source = force_reg (mode, source);
10448 rs6000_emit_le_vsx_store (dest, source, mode);
10452 /* Return whether a SFmode or SImode move can be done without converting one
10453 mode to another. This arrises when we have:
10455 (SUBREG:SF (REG:SI ...))
10456 (SUBREG:SI (REG:SF ...))
10458 and one of the values is in a floating point/vector register, where SFmode
10459 scalars are stored in DFmode format. */
10461 bool
10462 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10464 if (TARGET_ALLOW_SF_SUBREG)
10465 return true;
10467 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10468 return true;
10470 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10471 return true;
10473 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10474 if (SUBREG_P (dest))
10476 rtx dest_subreg = SUBREG_REG (dest);
10477 rtx src_subreg = SUBREG_REG (src);
10478 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10481 return false;
10485 /* Helper function to change moves with:
10487 (SUBREG:SF (REG:SI)) and
10488 (SUBREG:SI (REG:SF))
10490 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10491 values are stored as DFmode values in the VSX registers. We need to convert
10492 the bits before we can use a direct move or operate on the bits in the
10493 vector register as an integer type.
10495 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10497 static bool
10498 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10500 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10501 && !lra_in_progress
10502 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10503 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10505 rtx inner_source = SUBREG_REG (source);
10506 machine_mode inner_mode = GET_MODE (inner_source);
10508 if (mode == SImode && inner_mode == SFmode)
10510 emit_insn (gen_movsi_from_sf (dest, inner_source));
10511 return true;
10514 if (mode == SFmode && inner_mode == SImode)
10516 emit_insn (gen_movsf_from_si (dest, inner_source));
10517 return true;
10521 return false;
10524 /* Emit a move from SOURCE to DEST in mode MODE. */
10525 void
10526 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10528 rtx operands[2];
10529 operands[0] = dest;
10530 operands[1] = source;
10532 if (TARGET_DEBUG_ADDR)
10534 fprintf (stderr,
10535 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10536 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10537 GET_MODE_NAME (mode),
10538 reload_in_progress,
10539 reload_completed,
10540 can_create_pseudo_p ());
10541 debug_rtx (dest);
10542 fprintf (stderr, "source:\n");
10543 debug_rtx (source);
10546 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10547 if (CONST_WIDE_INT_P (operands[1])
10548 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10550 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10551 gcc_unreachable ();
10554 /* See if we need to special case SImode/SFmode SUBREG moves. */
10555 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10556 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10557 return;
10559 /* Check if GCC is setting up a block move that will end up using FP
10560 registers as temporaries. We must make sure this is acceptable. */
10561 if (GET_CODE (operands[0]) == MEM
10562 && GET_CODE (operands[1]) == MEM
10563 && mode == DImode
10564 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10565 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10566 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10567 ? 32 : MEM_ALIGN (operands[0])))
10568 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10569 ? 32
10570 : MEM_ALIGN (operands[1]))))
10571 && ! MEM_VOLATILE_P (operands [0])
10572 && ! MEM_VOLATILE_P (operands [1]))
10574 emit_move_insn (adjust_address (operands[0], SImode, 0),
10575 adjust_address (operands[1], SImode, 0));
10576 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10577 adjust_address (copy_rtx (operands[1]), SImode, 4));
10578 return;
10581 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10582 && !gpc_reg_operand (operands[1], mode))
10583 operands[1] = force_reg (mode, operands[1]);
10585 /* Recognize the case where operand[1] is a reference to thread-local
10586 data and load its address to a register. */
10587 if (tls_referenced_p (operands[1]))
10589 enum tls_model model;
10590 rtx tmp = operands[1];
10591 rtx addend = NULL;
10593 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10595 addend = XEXP (XEXP (tmp, 0), 1);
10596 tmp = XEXP (XEXP (tmp, 0), 0);
10599 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10600 model = SYMBOL_REF_TLS_MODEL (tmp);
10601 gcc_assert (model != 0);
10603 tmp = rs6000_legitimize_tls_address (tmp, model);
10604 if (addend)
10606 tmp = gen_rtx_PLUS (mode, tmp, addend);
10607 tmp = force_operand (tmp, operands[0]);
10609 operands[1] = tmp;
10612 /* Handle the case where reload calls us with an invalid address. */
10613 if (reload_in_progress && mode == Pmode
10614 && (! general_operand (operands[1], mode)
10615 || ! nonimmediate_operand (operands[0], mode)))
10616 goto emit_set;
10618 /* 128-bit constant floating-point values on Darwin should really be loaded
10619 as two parts. However, this premature splitting is a problem when DFmode
10620 values can go into Altivec registers. */
10621 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10622 && GET_CODE (operands[1]) == CONST_DOUBLE)
10624 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10625 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10626 DFmode);
10627 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10628 GET_MODE_SIZE (DFmode)),
10629 simplify_gen_subreg (DFmode, operands[1], mode,
10630 GET_MODE_SIZE (DFmode)),
10631 DFmode);
10632 return;
10635 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10636 cfun->machine->sdmode_stack_slot =
10637 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10640 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10641 p1:SD) if p1 is not of floating point class and p0 is spilled as
10642 we can have no analogous movsd_store for this. */
10643 if (lra_in_progress && mode == DDmode
10644 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10645 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10646 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10647 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10649 enum reg_class cl;
10650 int regno = REGNO (SUBREG_REG (operands[1]));
10652 if (regno >= FIRST_PSEUDO_REGISTER)
10654 cl = reg_preferred_class (regno);
10655 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10657 if (regno >= 0 && ! FP_REGNO_P (regno))
10659 mode = SDmode;
10660 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10661 operands[1] = SUBREG_REG (operands[1]);
10664 if (lra_in_progress
10665 && mode == SDmode
10666 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10667 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10668 && (REG_P (operands[1])
10669 || (GET_CODE (operands[1]) == SUBREG
10670 && REG_P (SUBREG_REG (operands[1])))))
10672 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10673 ? SUBREG_REG (operands[1]) : operands[1]);
10674 enum reg_class cl;
10676 if (regno >= FIRST_PSEUDO_REGISTER)
10678 cl = reg_preferred_class (regno);
10679 gcc_assert (cl != NO_REGS);
10680 regno = ira_class_hard_regs[cl][0];
10682 if (FP_REGNO_P (regno))
10684 if (GET_MODE (operands[0]) != DDmode)
10685 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10686 emit_insn (gen_movsd_store (operands[0], operands[1]));
10688 else if (INT_REGNO_P (regno))
10689 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10690 else
10691 gcc_unreachable();
10692 return;
10694 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10695 p:DD)) if p0 is not of floating point class and p1 is spilled as
10696 we can have no analogous movsd_load for this. */
10697 if (lra_in_progress && mode == DDmode
10698 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10699 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10700 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10701 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10703 enum reg_class cl;
10704 int regno = REGNO (SUBREG_REG (operands[0]));
10706 if (regno >= FIRST_PSEUDO_REGISTER)
10708 cl = reg_preferred_class (regno);
10709 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10711 if (regno >= 0 && ! FP_REGNO_P (regno))
10713 mode = SDmode;
10714 operands[0] = SUBREG_REG (operands[0]);
10715 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10718 if (lra_in_progress
10719 && mode == SDmode
10720 && (REG_P (operands[0])
10721 || (GET_CODE (operands[0]) == SUBREG
10722 && REG_P (SUBREG_REG (operands[0]))))
10723 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10724 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10726 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10727 ? SUBREG_REG (operands[0]) : operands[0]);
10728 enum reg_class cl;
10730 if (regno >= FIRST_PSEUDO_REGISTER)
10732 cl = reg_preferred_class (regno);
10733 gcc_assert (cl != NO_REGS);
10734 regno = ira_class_hard_regs[cl][0];
10736 if (FP_REGNO_P (regno))
10738 if (GET_MODE (operands[1]) != DDmode)
10739 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10740 emit_insn (gen_movsd_load (operands[0], operands[1]));
10742 else if (INT_REGNO_P (regno))
10743 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10744 else
10745 gcc_unreachable();
10746 return;
10749 if (reload_in_progress
10750 && mode == SDmode
10751 && cfun->machine->sdmode_stack_slot != NULL_RTX
10752 && MEM_P (operands[0])
10753 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10754 && REG_P (operands[1]))
10756 if (FP_REGNO_P (REGNO (operands[1])))
10758 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10759 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10760 emit_insn (gen_movsd_store (mem, operands[1]));
10762 else if (INT_REGNO_P (REGNO (operands[1])))
10764 rtx mem = operands[0];
10765 if (BYTES_BIG_ENDIAN)
10766 mem = adjust_address_nv (mem, mode, 4);
10767 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10768 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10770 else
10771 gcc_unreachable();
10772 return;
10774 if (reload_in_progress
10775 && mode == SDmode
10776 && REG_P (operands[0])
10777 && MEM_P (operands[1])
10778 && cfun->machine->sdmode_stack_slot != NULL_RTX
10779 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10781 if (FP_REGNO_P (REGNO (operands[0])))
10783 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10784 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10785 emit_insn (gen_movsd_load (operands[0], mem));
10787 else if (INT_REGNO_P (REGNO (operands[0])))
10789 rtx mem = operands[1];
10790 if (BYTES_BIG_ENDIAN)
10791 mem = adjust_address_nv (mem, mode, 4);
10792 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10793 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10795 else
10796 gcc_unreachable();
10797 return;
10800 /* FIXME: In the long term, this switch statement should go away
10801 and be replaced by a sequence of tests based on things like
10802 mode == Pmode. */
10803 switch (mode)
10805 case HImode:
10806 case QImode:
10807 if (CONSTANT_P (operands[1])
10808 && GET_CODE (operands[1]) != CONST_INT)
10809 operands[1] = force_const_mem (mode, operands[1]);
10810 break;
10812 case TFmode:
10813 case TDmode:
10814 case IFmode:
10815 case KFmode:
10816 if (FLOAT128_2REG_P (mode))
10817 rs6000_eliminate_indexed_memrefs (operands);
10818 /* fall through */
10820 case DFmode:
10821 case DDmode:
10822 case SFmode:
10823 case SDmode:
10824 if (CONSTANT_P (operands[1])
10825 && ! easy_fp_constant (operands[1], mode))
10826 operands[1] = force_const_mem (mode, operands[1]);
10827 break;
10829 case V16QImode:
10830 case V8HImode:
10831 case V4SFmode:
10832 case V4SImode:
10833 case V4HImode:
10834 case V2SFmode:
10835 case V2SImode:
10836 case V1DImode:
10837 case V2DFmode:
10838 case V2DImode:
10839 case V1TImode:
10840 if (CONSTANT_P (operands[1])
10841 && !easy_vector_constant (operands[1], mode))
10842 operands[1] = force_const_mem (mode, operands[1]);
10843 break;
10845 case SImode:
10846 case DImode:
10847 /* Use default pattern for address of ELF small data */
10848 if (TARGET_ELF
10849 && mode == Pmode
10850 && DEFAULT_ABI == ABI_V4
10851 && (GET_CODE (operands[1]) == SYMBOL_REF
10852 || GET_CODE (operands[1]) == CONST)
10853 && small_data_operand (operands[1], mode))
10855 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10856 return;
10859 if (DEFAULT_ABI == ABI_V4
10860 && mode == Pmode && mode == SImode
10861 && flag_pic == 1 && got_operand (operands[1], mode))
10863 emit_insn (gen_movsi_got (operands[0], operands[1]));
10864 return;
10867 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10868 && TARGET_NO_TOC
10869 && ! flag_pic
10870 && mode == Pmode
10871 && CONSTANT_P (operands[1])
10872 && GET_CODE (operands[1]) != HIGH
10873 && GET_CODE (operands[1]) != CONST_INT)
10875 rtx target = (!can_create_pseudo_p ()
10876 ? operands[0]
10877 : gen_reg_rtx (mode));
10879 /* If this is a function address on -mcall-aixdesc,
10880 convert it to the address of the descriptor. */
10881 if (DEFAULT_ABI == ABI_AIX
10882 && GET_CODE (operands[1]) == SYMBOL_REF
10883 && XSTR (operands[1], 0)[0] == '.')
10885 const char *name = XSTR (operands[1], 0);
10886 rtx new_ref;
10887 while (*name == '.')
10888 name++;
10889 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10890 CONSTANT_POOL_ADDRESS_P (new_ref)
10891 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10892 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10893 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10894 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10895 operands[1] = new_ref;
10898 if (DEFAULT_ABI == ABI_DARWIN)
10900 #if TARGET_MACHO
10901 if (MACHO_DYNAMIC_NO_PIC_P)
10903 /* Take care of any required data indirection. */
10904 operands[1] = rs6000_machopic_legitimize_pic_address (
10905 operands[1], mode, operands[0]);
10906 if (operands[0] != operands[1])
10907 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10908 return;
10910 #endif
10911 emit_insn (gen_macho_high (target, operands[1]));
10912 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10913 return;
10916 emit_insn (gen_elf_high (target, operands[1]));
10917 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10918 return;
10921 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10922 and we have put it in the TOC, we just need to make a TOC-relative
10923 reference to it. */
10924 if (TARGET_TOC
10925 && GET_CODE (operands[1]) == SYMBOL_REF
10926 && use_toc_relative_ref (operands[1], mode))
10927 operands[1] = create_TOC_reference (operands[1], operands[0]);
10928 else if (mode == Pmode
10929 && CONSTANT_P (operands[1])
10930 && GET_CODE (operands[1]) != HIGH
10931 && ((GET_CODE (operands[1]) != CONST_INT
10932 && ! easy_fp_constant (operands[1], mode))
10933 || (GET_CODE (operands[1]) == CONST_INT
10934 && (num_insns_constant (operands[1], mode)
10935 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10936 || (GET_CODE (operands[0]) == REG
10937 && FP_REGNO_P (REGNO (operands[0]))))
10938 && !toc_relative_expr_p (operands[1], false)
10939 && (TARGET_CMODEL == CMODEL_SMALL
10940 || can_create_pseudo_p ()
10941 || (REG_P (operands[0])
10942 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10945 #if TARGET_MACHO
10946 /* Darwin uses a special PIC legitimizer. */
10947 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10949 operands[1] =
10950 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10951 operands[0]);
10952 if (operands[0] != operands[1])
10953 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10954 return;
10956 #endif
10958 /* If we are to limit the number of things we put in the TOC and
10959 this is a symbol plus a constant we can add in one insn,
10960 just put the symbol in the TOC and add the constant. Don't do
10961 this if reload is in progress. */
10962 if (GET_CODE (operands[1]) == CONST
10963 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10964 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10965 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10966 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10967 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10968 && ! side_effects_p (operands[0]))
10970 rtx sym =
10971 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10972 rtx other = XEXP (XEXP (operands[1], 0), 1);
10974 sym = force_reg (mode, sym);
10975 emit_insn (gen_add3_insn (operands[0], sym, other));
10976 return;
10979 operands[1] = force_const_mem (mode, operands[1]);
10981 if (TARGET_TOC
10982 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10983 && constant_pool_expr_p (XEXP (operands[1], 0))
10984 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
10985 get_pool_constant (XEXP (operands[1], 0)),
10986 get_pool_mode (XEXP (operands[1], 0))))
10988 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10989 operands[0]);
10990 operands[1] = gen_const_mem (mode, tocref);
10991 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10994 break;
10996 case TImode:
10997 if (!VECTOR_MEM_VSX_P (TImode))
10998 rs6000_eliminate_indexed_memrefs (operands);
10999 break;
11001 case PTImode:
11002 rs6000_eliminate_indexed_memrefs (operands);
11003 break;
11005 default:
11006 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11009 /* Above, we may have called force_const_mem which may have returned
11010 an invalid address. If we can, fix this up; otherwise, reload will
11011 have to deal with it. */
11012 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11013 operands[1] = validize_mem (operands[1]);
11015 emit_set:
11016 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11019 /* Return true if a structure, union or array containing FIELD should be
11020 accessed using `BLKMODE'.
11022 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11023 entire thing in a DI and use subregs to access the internals.
11024 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11025 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11026 best thing to do is set structs to BLKmode and avoid Severe Tire
11027 Damage.
11029 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11030 fit into 1, whereas DI still needs two. */
11032 static bool
11033 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
11035 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
11036 || (TARGET_E500_DOUBLE && mode == DFmode));
11039 /* Nonzero if we can use a floating-point register to pass this arg. */
11040 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11041 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11042 && (CUM)->fregno <= FP_ARG_MAX_REG \
11043 && TARGET_HARD_FLOAT && TARGET_FPRS)
11045 /* Nonzero if we can use an AltiVec register to pass this arg. */
11046 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11047 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11048 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11049 && TARGET_ALTIVEC_ABI \
11050 && (NAMED))
11052 /* Walk down the type tree of TYPE counting consecutive base elements.
11053 If *MODEP is VOIDmode, then set it to the first valid floating point
11054 or vector type. If a non-floating point or vector type is found, or
11055 if a floating point or vector type that doesn't match a non-VOIDmode
11056 *MODEP is found, then return -1, otherwise return the count in the
11057 sub-tree. */
11059 static int
11060 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11062 machine_mode mode;
11063 HOST_WIDE_INT size;
11065 switch (TREE_CODE (type))
11067 case REAL_TYPE:
11068 mode = TYPE_MODE (type);
11069 if (!SCALAR_FLOAT_MODE_P (mode))
11070 return -1;
11072 if (*modep == VOIDmode)
11073 *modep = mode;
11075 if (*modep == mode)
11076 return 1;
11078 break;
11080 case COMPLEX_TYPE:
11081 mode = TYPE_MODE (TREE_TYPE (type));
11082 if (!SCALAR_FLOAT_MODE_P (mode))
11083 return -1;
11085 if (*modep == VOIDmode)
11086 *modep = mode;
11088 if (*modep == mode)
11089 return 2;
11091 break;
11093 case VECTOR_TYPE:
11094 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11095 return -1;
11097 /* Use V4SImode as representative of all 128-bit vector types. */
11098 size = int_size_in_bytes (type);
11099 switch (size)
11101 case 16:
11102 mode = V4SImode;
11103 break;
11104 default:
11105 return -1;
11108 if (*modep == VOIDmode)
11109 *modep = mode;
11111 /* Vector modes are considered to be opaque: two vectors are
11112 equivalent for the purposes of being homogeneous aggregates
11113 if they are the same size. */
11114 if (*modep == mode)
11115 return 1;
11117 break;
11119 case ARRAY_TYPE:
11121 int count;
11122 tree index = TYPE_DOMAIN (type);
11124 /* Can't handle incomplete types nor sizes that are not
11125 fixed. */
11126 if (!COMPLETE_TYPE_P (type)
11127 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11128 return -1;
11130 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11131 if (count == -1
11132 || !index
11133 || !TYPE_MAX_VALUE (index)
11134 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11135 || !TYPE_MIN_VALUE (index)
11136 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11137 || count < 0)
11138 return -1;
11140 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11141 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11143 /* There must be no padding. */
11144 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11145 return -1;
11147 return count;
11150 case RECORD_TYPE:
11152 int count = 0;
11153 int sub_count;
11154 tree field;
11156 /* Can't handle incomplete types nor sizes that are not
11157 fixed. */
11158 if (!COMPLETE_TYPE_P (type)
11159 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11160 return -1;
11162 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11164 if (TREE_CODE (field) != FIELD_DECL)
11165 continue;
11167 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11168 if (sub_count < 0)
11169 return -1;
11170 count += sub_count;
11173 /* There must be no padding. */
11174 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11175 return -1;
11177 return count;
11180 case UNION_TYPE:
11181 case QUAL_UNION_TYPE:
11183 /* These aren't very interesting except in a degenerate case. */
11184 int count = 0;
11185 int sub_count;
11186 tree field;
11188 /* Can't handle incomplete types nor sizes that are not
11189 fixed. */
11190 if (!COMPLETE_TYPE_P (type)
11191 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11192 return -1;
11194 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11196 if (TREE_CODE (field) != FIELD_DECL)
11197 continue;
11199 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11200 if (sub_count < 0)
11201 return -1;
11202 count = count > sub_count ? count : sub_count;
11205 /* There must be no padding. */
11206 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11207 return -1;
11209 return count;
11212 default:
11213 break;
11216 return -1;
11219 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11220 float or vector aggregate that shall be passed in FP/vector registers
11221 according to the ELFv2 ABI, return the homogeneous element mode in
11222 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11224 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11226 static bool
11227 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11228 machine_mode *elt_mode,
11229 int *n_elts)
11231 /* Note that we do not accept complex types at the top level as
11232 homogeneous aggregates; these types are handled via the
11233 targetm.calls.split_complex_arg mechanism. Complex types
11234 can be elements of homogeneous aggregates, however. */
11235 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11237 machine_mode field_mode = VOIDmode;
11238 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11240 if (field_count > 0)
11242 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11243 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11245 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11246 up to AGGR_ARG_NUM_REG registers. */
11247 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11249 if (elt_mode)
11250 *elt_mode = field_mode;
11251 if (n_elts)
11252 *n_elts = field_count;
11253 return true;
11258 if (elt_mode)
11259 *elt_mode = mode;
11260 if (n_elts)
11261 *n_elts = 1;
11262 return false;
11265 /* Return a nonzero value to say to return the function value in
11266 memory, just as large structures are always returned. TYPE will be
11267 the data type of the value, and FNTYPE will be the type of the
11268 function doing the returning, or @code{NULL} for libcalls.
11270 The AIX ABI for the RS/6000 specifies that all structures are
11271 returned in memory. The Darwin ABI does the same.
11273 For the Darwin 64 Bit ABI, a function result can be returned in
11274 registers or in memory, depending on the size of the return data
11275 type. If it is returned in registers, the value occupies the same
11276 registers as it would if it were the first and only function
11277 argument. Otherwise, the function places its result in memory at
11278 the location pointed to by GPR3.
11280 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11281 but a draft put them in memory, and GCC used to implement the draft
11282 instead of the final standard. Therefore, aix_struct_return
11283 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11284 compatibility can change DRAFT_V4_STRUCT_RET to override the
11285 default, and -m switches get the final word. See
11286 rs6000_option_override_internal for more details.
11288 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11289 long double support is enabled. These values are returned in memory.
11291 int_size_in_bytes returns -1 for variable size objects, which go in
11292 memory always. The cast to unsigned makes -1 > 8. */
11294 static bool
11295 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11297 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11298 if (TARGET_MACHO
11299 && rs6000_darwin64_abi
11300 && TREE_CODE (type) == RECORD_TYPE
11301 && int_size_in_bytes (type) > 0)
11303 CUMULATIVE_ARGS valcum;
11304 rtx valret;
11306 valcum.words = 0;
11307 valcum.fregno = FP_ARG_MIN_REG;
11308 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11309 /* Do a trial code generation as if this were going to be passed
11310 as an argument; if any part goes in memory, we return NULL. */
11311 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11312 if (valret)
11313 return false;
11314 /* Otherwise fall through to more conventional ABI rules. */
11317 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11318 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11319 NULL, NULL))
11320 return false;
11322 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11323 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11324 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11325 return false;
11327 if (AGGREGATE_TYPE_P (type)
11328 && (aix_struct_return
11329 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11330 return true;
11332 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11333 modes only exist for GCC vector types if -maltivec. */
11334 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11335 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11336 return false;
11338 /* Return synthetic vectors in memory. */
11339 if (TREE_CODE (type) == VECTOR_TYPE
11340 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11342 static bool warned_for_return_big_vectors = false;
11343 if (!warned_for_return_big_vectors)
11345 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11346 "non-standard ABI extension with no compatibility guarantee");
11347 warned_for_return_big_vectors = true;
11349 return true;
11352 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11353 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11354 return true;
11356 return false;
11359 /* Specify whether values returned in registers should be at the most
11360 significant end of a register. We want aggregates returned by
11361 value to match the way aggregates are passed to functions. */
11363 static bool
11364 rs6000_return_in_msb (const_tree valtype)
11366 return (DEFAULT_ABI == ABI_ELFv2
11367 && BYTES_BIG_ENDIAN
11368 && AGGREGATE_TYPE_P (valtype)
11369 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11372 #ifdef HAVE_AS_GNU_ATTRIBUTE
11373 /* Return TRUE if a call to function FNDECL may be one that
11374 potentially affects the function calling ABI of the object file. */
11376 static bool
11377 call_ABI_of_interest (tree fndecl)
11379 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11381 struct cgraph_node *c_node;
11383 /* Libcalls are always interesting. */
11384 if (fndecl == NULL_TREE)
11385 return true;
11387 /* Any call to an external function is interesting. */
11388 if (DECL_EXTERNAL (fndecl))
11389 return true;
11391 /* Interesting functions that we are emitting in this object file. */
11392 c_node = cgraph_node::get (fndecl);
11393 c_node = c_node->ultimate_alias_target ();
11394 return !c_node->only_called_directly_p ();
11396 return false;
11398 #endif
11400 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11401 for a call to a function whose data type is FNTYPE.
11402 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11404 For incoming args we set the number of arguments in the prototype large
11405 so we never return a PARALLEL. */
11407 void
11408 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11409 rtx libname ATTRIBUTE_UNUSED, int incoming,
11410 int libcall, int n_named_args,
11411 tree fndecl ATTRIBUTE_UNUSED,
11412 machine_mode return_mode ATTRIBUTE_UNUSED)
11414 static CUMULATIVE_ARGS zero_cumulative;
11416 *cum = zero_cumulative;
11417 cum->words = 0;
11418 cum->fregno = FP_ARG_MIN_REG;
11419 cum->vregno = ALTIVEC_ARG_MIN_REG;
11420 cum->prototype = (fntype && prototype_p (fntype));
11421 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11422 ? CALL_LIBCALL : CALL_NORMAL);
11423 cum->sysv_gregno = GP_ARG_MIN_REG;
11424 cum->stdarg = stdarg_p (fntype);
11425 cum->libcall = libcall;
11427 cum->nargs_prototype = 0;
11428 if (incoming || cum->prototype)
11429 cum->nargs_prototype = n_named_args;
11431 /* Check for a longcall attribute. */
11432 if ((!fntype && rs6000_default_long_calls)
11433 || (fntype
11434 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11435 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11436 cum->call_cookie |= CALL_LONG;
11438 if (TARGET_DEBUG_ARG)
11440 fprintf (stderr, "\ninit_cumulative_args:");
11441 if (fntype)
11443 tree ret_type = TREE_TYPE (fntype);
11444 fprintf (stderr, " ret code = %s,",
11445 get_tree_code_name (TREE_CODE (ret_type)));
11448 if (cum->call_cookie & CALL_LONG)
11449 fprintf (stderr, " longcall,");
11451 fprintf (stderr, " proto = %d, nargs = %d\n",
11452 cum->prototype, cum->nargs_prototype);
11455 #ifdef HAVE_AS_GNU_ATTRIBUTE
11456 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11458 cum->escapes = call_ABI_of_interest (fndecl);
11459 if (cum->escapes)
11461 tree return_type;
11463 if (fntype)
11465 return_type = TREE_TYPE (fntype);
11466 return_mode = TYPE_MODE (return_type);
11468 else
11469 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11471 if (return_type != NULL)
11473 if (TREE_CODE (return_type) == RECORD_TYPE
11474 && TYPE_TRANSPARENT_AGGR (return_type))
11476 return_type = TREE_TYPE (first_field (return_type));
11477 return_mode = TYPE_MODE (return_type);
11479 if (AGGREGATE_TYPE_P (return_type)
11480 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11481 <= 8))
11482 rs6000_returns_struct = true;
11484 if (SCALAR_FLOAT_MODE_P (return_mode))
11486 rs6000_passes_float = true;
11487 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11488 && (FLOAT128_IBM_P (return_mode)
11489 || FLOAT128_IEEE_P (return_mode)
11490 || (return_type != NULL
11491 && (TYPE_MAIN_VARIANT (return_type)
11492 == long_double_type_node))))
11493 rs6000_passes_long_double = true;
11495 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11496 || SPE_VECTOR_MODE (return_mode))
11497 rs6000_passes_vector = true;
11500 #endif
11502 if (fntype
11503 && !TARGET_ALTIVEC
11504 && TARGET_ALTIVEC_ABI
11505 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11507 error ("cannot return value in vector register because"
11508 " altivec instructions are disabled, use -maltivec"
11509 " to enable them");
11513 /* The mode the ABI uses for a word. This is not the same as word_mode
11514 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11516 static machine_mode
11517 rs6000_abi_word_mode (void)
11519 return TARGET_32BIT ? SImode : DImode;
11522 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11523 static char *
11524 rs6000_offload_options (void)
11526 if (TARGET_64BIT)
11527 return xstrdup ("-foffload-abi=lp64");
11528 else
11529 return xstrdup ("-foffload-abi=ilp32");
11532 /* On rs6000, function arguments are promoted, as are function return
11533 values. */
11535 static machine_mode
11536 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11537 machine_mode mode,
11538 int *punsignedp ATTRIBUTE_UNUSED,
11539 const_tree, int)
11541 PROMOTE_MODE (mode, *punsignedp, type);
11543 return mode;
11546 /* Return true if TYPE must be passed on the stack and not in registers. */
11548 static bool
11549 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11551 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11552 return must_pass_in_stack_var_size (mode, type);
11553 else
11554 return must_pass_in_stack_var_size_or_pad (mode, type);
11557 static inline bool
11558 is_complex_IBM_long_double (machine_mode mode)
11560 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11563 /* Whether ABI_V4 passes MODE args to a function in floating point
11564 registers. */
11566 static bool
11567 abi_v4_pass_in_fpr (machine_mode mode)
11569 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11570 return false;
11571 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11572 return true;
11573 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11574 return true;
11575 /* ABI_V4 passes complex IBM long double in 8 gprs.
11576 Stupid, but we can't change the ABI now. */
11577 if (is_complex_IBM_long_double (mode))
11578 return false;
11579 if (FLOAT128_2REG_P (mode))
11580 return true;
11581 if (DECIMAL_FLOAT_MODE_P (mode))
11582 return true;
11583 return false;
11586 /* If defined, a C expression which determines whether, and in which
11587 direction, to pad out an argument with extra space. The value
11588 should be of type `enum direction': either `upward' to pad above
11589 the argument, `downward' to pad below, or `none' to inhibit
11590 padding.
11592 For the AIX ABI structs are always stored left shifted in their
11593 argument slot. */
11595 enum direction
11596 function_arg_padding (machine_mode mode, const_tree type)
11598 #ifndef AGGREGATE_PADDING_FIXED
11599 #define AGGREGATE_PADDING_FIXED 0
11600 #endif
11601 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11602 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11603 #endif
11605 if (!AGGREGATE_PADDING_FIXED)
11607 /* GCC used to pass structures of the same size as integer types as
11608 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11609 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11610 passed padded downward, except that -mstrict-align further
11611 muddied the water in that multi-component structures of 2 and 4
11612 bytes in size were passed padded upward.
11614 The following arranges for best compatibility with previous
11615 versions of gcc, but removes the -mstrict-align dependency. */
11616 if (BYTES_BIG_ENDIAN)
11618 HOST_WIDE_INT size = 0;
11620 if (mode == BLKmode)
11622 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11623 size = int_size_in_bytes (type);
11625 else
11626 size = GET_MODE_SIZE (mode);
11628 if (size == 1 || size == 2 || size == 4)
11629 return downward;
11631 return upward;
11634 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11636 if (type != 0 && AGGREGATE_TYPE_P (type))
11637 return upward;
11640 /* Fall back to the default. */
11641 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11644 /* If defined, a C expression that gives the alignment boundary, in bits,
11645 of an argument with the specified mode and type. If it is not defined,
11646 PARM_BOUNDARY is used for all arguments.
11648 V.4 wants long longs and doubles to be double word aligned. Just
11649 testing the mode size is a boneheaded way to do this as it means
11650 that other types such as complex int are also double word aligned.
11651 However, we're stuck with this because changing the ABI might break
11652 existing library interfaces.
11654 Doubleword align SPE vectors.
11655 Quadword align Altivec/VSX vectors.
11656 Quadword align large synthetic vector types. */
11658 static unsigned int
11659 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11661 machine_mode elt_mode;
11662 int n_elts;
11664 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11666 if (DEFAULT_ABI == ABI_V4
11667 && (GET_MODE_SIZE (mode) == 8
11668 || (TARGET_HARD_FLOAT
11669 && TARGET_FPRS
11670 && !is_complex_IBM_long_double (mode)
11671 && FLOAT128_2REG_P (mode))))
11672 return 64;
11673 else if (FLOAT128_VECTOR_P (mode))
11674 return 128;
11675 else if (SPE_VECTOR_MODE (mode)
11676 || (type && TREE_CODE (type) == VECTOR_TYPE
11677 && int_size_in_bytes (type) >= 8
11678 && int_size_in_bytes (type) < 16))
11679 return 64;
11680 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11681 || (type && TREE_CODE (type) == VECTOR_TYPE
11682 && int_size_in_bytes (type) >= 16))
11683 return 128;
11685 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11686 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11687 -mcompat-align-parm is used. */
11688 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11689 || DEFAULT_ABI == ABI_ELFv2)
11690 && type && TYPE_ALIGN (type) > 64)
11692 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11693 or homogeneous float/vector aggregates here. We already handled
11694 vector aggregates above, but still need to check for float here. */
11695 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11696 && !SCALAR_FLOAT_MODE_P (elt_mode));
11698 /* We used to check for BLKmode instead of the above aggregate type
11699 check. Warn when this results in any difference to the ABI. */
11700 if (aggregate_p != (mode == BLKmode))
11702 static bool warned;
11703 if (!warned && warn_psabi)
11705 warned = true;
11706 inform (input_location,
11707 "the ABI of passing aggregates with %d-byte alignment"
11708 " has changed in GCC 5",
11709 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11713 if (aggregate_p)
11714 return 128;
11717 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11718 implement the "aggregate type" check as a BLKmode check here; this
11719 means certain aggregate types are in fact not aligned. */
11720 if (TARGET_MACHO && rs6000_darwin64_abi
11721 && mode == BLKmode
11722 && type && TYPE_ALIGN (type) > 64)
11723 return 128;
11725 return PARM_BOUNDARY;
11728 /* The offset in words to the start of the parameter save area. */
11730 static unsigned int
11731 rs6000_parm_offset (void)
11733 return (DEFAULT_ABI == ABI_V4 ? 2
11734 : DEFAULT_ABI == ABI_ELFv2 ? 4
11735 : 6);
11738 /* For a function parm of MODE and TYPE, return the starting word in
11739 the parameter area. NWORDS of the parameter area are already used. */
11741 static unsigned int
11742 rs6000_parm_start (machine_mode mode, const_tree type,
11743 unsigned int nwords)
11745 unsigned int align;
11747 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11748 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11751 /* Compute the size (in words) of a function argument. */
11753 static unsigned long
11754 rs6000_arg_size (machine_mode mode, const_tree type)
11756 unsigned long size;
11758 if (mode != BLKmode)
11759 size = GET_MODE_SIZE (mode);
11760 else
11761 size = int_size_in_bytes (type);
11763 if (TARGET_32BIT)
11764 return (size + 3) >> 2;
11765 else
11766 return (size + 7) >> 3;
11769 /* Use this to flush pending int fields. */
11771 static void
11772 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11773 HOST_WIDE_INT bitpos, int final)
11775 unsigned int startbit, endbit;
11776 int intregs, intoffset;
11777 machine_mode mode;
11779 /* Handle the situations where a float is taking up the first half
11780 of the GPR, and the other half is empty (typically due to
11781 alignment restrictions). We can detect this by a 8-byte-aligned
11782 int field, or by seeing that this is the final flush for this
11783 argument. Count the word and continue on. */
11784 if (cum->floats_in_gpr == 1
11785 && (cum->intoffset % 64 == 0
11786 || (cum->intoffset == -1 && final)))
11788 cum->words++;
11789 cum->floats_in_gpr = 0;
11792 if (cum->intoffset == -1)
11793 return;
11795 intoffset = cum->intoffset;
11796 cum->intoffset = -1;
11797 cum->floats_in_gpr = 0;
11799 if (intoffset % BITS_PER_WORD != 0)
11801 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11802 MODE_INT, 0);
11803 if (mode == BLKmode)
11805 /* We couldn't find an appropriate mode, which happens,
11806 e.g., in packed structs when there are 3 bytes to load.
11807 Back intoffset back to the beginning of the word in this
11808 case. */
11809 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11813 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11814 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11815 intregs = (endbit - startbit) / BITS_PER_WORD;
11816 cum->words += intregs;
11817 /* words should be unsigned. */
11818 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11820 int pad = (endbit/BITS_PER_WORD) - cum->words;
11821 cum->words += pad;
11825 /* The darwin64 ABI calls for us to recurse down through structs,
11826 looking for elements passed in registers. Unfortunately, we have
11827 to track int register count here also because of misalignments
11828 in powerpc alignment mode. */
11830 static void
11831 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11832 const_tree type,
11833 HOST_WIDE_INT startbitpos)
11835 tree f;
11837 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11838 if (TREE_CODE (f) == FIELD_DECL)
11840 HOST_WIDE_INT bitpos = startbitpos;
11841 tree ftype = TREE_TYPE (f);
11842 machine_mode mode;
11843 if (ftype == error_mark_node)
11844 continue;
11845 mode = TYPE_MODE (ftype);
11847 if (DECL_SIZE (f) != 0
11848 && tree_fits_uhwi_p (bit_position (f)))
11849 bitpos += int_bit_position (f);
11851 /* ??? FIXME: else assume zero offset. */
11853 if (TREE_CODE (ftype) == RECORD_TYPE)
11854 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11855 else if (USE_FP_FOR_ARG_P (cum, mode))
11857 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11858 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11859 cum->fregno += n_fpregs;
11860 /* Single-precision floats present a special problem for
11861 us, because they are smaller than an 8-byte GPR, and so
11862 the structure-packing rules combined with the standard
11863 varargs behavior mean that we want to pack float/float
11864 and float/int combinations into a single register's
11865 space. This is complicated by the arg advance flushing,
11866 which works on arbitrarily large groups of int-type
11867 fields. */
11868 if (mode == SFmode)
11870 if (cum->floats_in_gpr == 1)
11872 /* Two floats in a word; count the word and reset
11873 the float count. */
11874 cum->words++;
11875 cum->floats_in_gpr = 0;
11877 else if (bitpos % 64 == 0)
11879 /* A float at the beginning of an 8-byte word;
11880 count it and put off adjusting cum->words until
11881 we see if a arg advance flush is going to do it
11882 for us. */
11883 cum->floats_in_gpr++;
11885 else
11887 /* The float is at the end of a word, preceded
11888 by integer fields, so the arg advance flush
11889 just above has already set cum->words and
11890 everything is taken care of. */
11893 else
11894 cum->words += n_fpregs;
11896 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11898 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11899 cum->vregno++;
11900 cum->words += 2;
11902 else if (cum->intoffset == -1)
11903 cum->intoffset = bitpos;
11907 /* Check for an item that needs to be considered specially under the darwin 64
11908 bit ABI. These are record types where the mode is BLK or the structure is
11909 8 bytes in size. */
11910 static int
11911 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11913 return rs6000_darwin64_abi
11914 && ((mode == BLKmode
11915 && TREE_CODE (type) == RECORD_TYPE
11916 && int_size_in_bytes (type) > 0)
11917 || (type && TREE_CODE (type) == RECORD_TYPE
11918 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11921 /* Update the data in CUM to advance over an argument
11922 of mode MODE and data type TYPE.
11923 (TYPE is null for libcalls where that information may not be available.)
11925 Note that for args passed by reference, function_arg will be called
11926 with MODE and TYPE set to that of the pointer to the arg, not the arg
11927 itself. */
11929 static void
11930 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11931 const_tree type, bool named, int depth)
11933 machine_mode elt_mode;
11934 int n_elts;
11936 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11938 /* Only tick off an argument if we're not recursing. */
11939 if (depth == 0)
11940 cum->nargs_prototype--;
11942 #ifdef HAVE_AS_GNU_ATTRIBUTE
11943 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11944 && cum->escapes)
11946 if (SCALAR_FLOAT_MODE_P (mode))
11948 rs6000_passes_float = true;
11949 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11950 && (FLOAT128_IBM_P (mode)
11951 || FLOAT128_IEEE_P (mode)
11952 || (type != NULL
11953 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11954 rs6000_passes_long_double = true;
11956 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11957 || (SPE_VECTOR_MODE (mode)
11958 && !cum->stdarg
11959 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11960 rs6000_passes_vector = true;
11962 #endif
11964 if (TARGET_ALTIVEC_ABI
11965 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11966 || (type && TREE_CODE (type) == VECTOR_TYPE
11967 && int_size_in_bytes (type) == 16)))
11969 bool stack = false;
11971 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11973 cum->vregno += n_elts;
11975 if (!TARGET_ALTIVEC)
11976 error ("cannot pass argument in vector register because"
11977 " altivec instructions are disabled, use -maltivec"
11978 " to enable them");
11980 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11981 even if it is going to be passed in a vector register.
11982 Darwin does the same for variable-argument functions. */
11983 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11984 && TARGET_64BIT)
11985 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11986 stack = true;
11988 else
11989 stack = true;
11991 if (stack)
11993 int align;
11995 /* Vector parameters must be 16-byte aligned. In 32-bit
11996 mode this means we need to take into account the offset
11997 to the parameter save area. In 64-bit mode, they just
11998 have to start on an even word, since the parameter save
11999 area is 16-byte aligned. */
12000 if (TARGET_32BIT)
12001 align = -(rs6000_parm_offset () + cum->words) & 3;
12002 else
12003 align = cum->words & 1;
12004 cum->words += align + rs6000_arg_size (mode, type);
12006 if (TARGET_DEBUG_ARG)
12008 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12009 cum->words, align);
12010 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12011 cum->nargs_prototype, cum->prototype,
12012 GET_MODE_NAME (mode));
12016 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
12017 && !cum->stdarg
12018 && cum->sysv_gregno <= GP_ARG_MAX_REG)
12019 cum->sysv_gregno++;
12021 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12023 int size = int_size_in_bytes (type);
12024 /* Variable sized types have size == -1 and are
12025 treated as if consisting entirely of ints.
12026 Pad to 16 byte boundary if needed. */
12027 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12028 && (cum->words % 2) != 0)
12029 cum->words++;
12030 /* For varargs, we can just go up by the size of the struct. */
12031 if (!named)
12032 cum->words += (size + 7) / 8;
12033 else
12035 /* It is tempting to say int register count just goes up by
12036 sizeof(type)/8, but this is wrong in a case such as
12037 { int; double; int; } [powerpc alignment]. We have to
12038 grovel through the fields for these too. */
12039 cum->intoffset = 0;
12040 cum->floats_in_gpr = 0;
12041 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12042 rs6000_darwin64_record_arg_advance_flush (cum,
12043 size * BITS_PER_UNIT, 1);
12045 if (TARGET_DEBUG_ARG)
12047 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12048 cum->words, TYPE_ALIGN (type), size);
12049 fprintf (stderr,
12050 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12051 cum->nargs_prototype, cum->prototype,
12052 GET_MODE_NAME (mode));
12055 else if (DEFAULT_ABI == ABI_V4)
12057 if (abi_v4_pass_in_fpr (mode))
12059 /* _Decimal128 must use an even/odd register pair. This assumes
12060 that the register number is odd when fregno is odd. */
12061 if (mode == TDmode && (cum->fregno % 2) == 1)
12062 cum->fregno++;
12064 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12065 <= FP_ARG_V4_MAX_REG)
12066 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12067 else
12069 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12070 if (mode == DFmode || FLOAT128_IBM_P (mode)
12071 || mode == DDmode || mode == TDmode)
12072 cum->words += cum->words & 1;
12073 cum->words += rs6000_arg_size (mode, type);
12076 else
12078 int n_words = rs6000_arg_size (mode, type);
12079 int gregno = cum->sysv_gregno;
12081 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12082 (r7,r8) or (r9,r10). As does any other 2 word item such
12083 as complex int due to a historical mistake. */
12084 if (n_words == 2)
12085 gregno += (1 - gregno) & 1;
12087 /* Multi-reg args are not split between registers and stack. */
12088 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12090 /* Long long and SPE vectors are aligned on the stack.
12091 So are other 2 word items such as complex int due to
12092 a historical mistake. */
12093 if (n_words == 2)
12094 cum->words += cum->words & 1;
12095 cum->words += n_words;
12098 /* Note: continuing to accumulate gregno past when we've started
12099 spilling to the stack indicates the fact that we've started
12100 spilling to the stack to expand_builtin_saveregs. */
12101 cum->sysv_gregno = gregno + n_words;
12104 if (TARGET_DEBUG_ARG)
12106 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12107 cum->words, cum->fregno);
12108 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12109 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12110 fprintf (stderr, "mode = %4s, named = %d\n",
12111 GET_MODE_NAME (mode), named);
12114 else
12116 int n_words = rs6000_arg_size (mode, type);
12117 int start_words = cum->words;
12118 int align_words = rs6000_parm_start (mode, type, start_words);
12120 cum->words = align_words + n_words;
12122 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
12124 /* _Decimal128 must be passed in an even/odd float register pair.
12125 This assumes that the register number is odd when fregno is
12126 odd. */
12127 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12128 cum->fregno++;
12129 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12132 if (TARGET_DEBUG_ARG)
12134 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12135 cum->words, cum->fregno);
12136 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12137 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12138 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12139 named, align_words - start_words, depth);
12144 static void
12145 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12146 const_tree type, bool named)
12148 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12152 static rtx
12153 spe_build_register_parallel (machine_mode mode, int gregno)
12155 rtx r1, r3, r5, r7;
12157 switch (mode)
12159 case DFmode:
12160 r1 = gen_rtx_REG (DImode, gregno);
12161 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12162 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
12164 case DCmode:
12165 case TFmode:
12166 r1 = gen_rtx_REG (DImode, gregno);
12167 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12168 r3 = gen_rtx_REG (DImode, gregno + 2);
12169 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12170 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
12172 case TCmode:
12173 r1 = gen_rtx_REG (DImode, gregno);
12174 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12175 r3 = gen_rtx_REG (DImode, gregno + 2);
12176 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12177 r5 = gen_rtx_REG (DImode, gregno + 4);
12178 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
12179 r7 = gen_rtx_REG (DImode, gregno + 6);
12180 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
12181 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
12183 default:
12184 gcc_unreachable ();
12188 /* Determine where to put a SIMD argument on the SPE. */
12189 static rtx
12190 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
12191 const_tree type)
12193 int gregno = cum->sysv_gregno;
12195 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12196 are passed and returned in a pair of GPRs for ABI compatibility. */
12197 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
12198 || mode == DCmode || mode == TCmode))
12200 int n_words = rs6000_arg_size (mode, type);
12202 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12203 if (mode == DFmode)
12204 gregno += (1 - gregno) & 1;
12206 /* Multi-reg args are not split between registers and stack. */
12207 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12208 return NULL_RTX;
12210 return spe_build_register_parallel (mode, gregno);
12212 if (cum->stdarg)
12214 int n_words = rs6000_arg_size (mode, type);
12216 /* SPE vectors are put in odd registers. */
12217 if (n_words == 2 && (gregno & 1) == 0)
12218 gregno += 1;
12220 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
12222 rtx r1, r2;
12223 machine_mode m = SImode;
12225 r1 = gen_rtx_REG (m, gregno);
12226 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
12227 r2 = gen_rtx_REG (m, gregno + 1);
12228 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
12229 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
12231 else
12232 return NULL_RTX;
12234 else
12236 if (gregno <= GP_ARG_MAX_REG)
12237 return gen_rtx_REG (mode, gregno);
12238 else
12239 return NULL_RTX;
12243 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12244 structure between cum->intoffset and bitpos to integer registers. */
12246 static void
12247 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12248 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12250 machine_mode mode;
12251 unsigned int regno;
12252 unsigned int startbit, endbit;
12253 int this_regno, intregs, intoffset;
12254 rtx reg;
12256 if (cum->intoffset == -1)
12257 return;
12259 intoffset = cum->intoffset;
12260 cum->intoffset = -1;
12262 /* If this is the trailing part of a word, try to only load that
12263 much into the register. Otherwise load the whole register. Note
12264 that in the latter case we may pick up unwanted bits. It's not a
12265 problem at the moment but may wish to revisit. */
12267 if (intoffset % BITS_PER_WORD != 0)
12269 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
12270 MODE_INT, 0);
12271 if (mode == BLKmode)
12273 /* We couldn't find an appropriate mode, which happens,
12274 e.g., in packed structs when there are 3 bytes to load.
12275 Back intoffset back to the beginning of the word in this
12276 case. */
12277 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12278 mode = word_mode;
12281 else
12282 mode = word_mode;
12284 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12285 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12286 intregs = (endbit - startbit) / BITS_PER_WORD;
12287 this_regno = cum->words + intoffset / BITS_PER_WORD;
12289 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12290 cum->use_stack = 1;
12292 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12293 if (intregs <= 0)
12294 return;
12296 intoffset /= BITS_PER_UNIT;
12299 regno = GP_ARG_MIN_REG + this_regno;
12300 reg = gen_rtx_REG (mode, regno);
12301 rvec[(*k)++] =
12302 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12304 this_regno += 1;
12305 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12306 mode = word_mode;
12307 intregs -= 1;
12309 while (intregs > 0);
12312 /* Recursive workhorse for the following. */
12314 static void
12315 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12316 HOST_WIDE_INT startbitpos, rtx rvec[],
12317 int *k)
12319 tree f;
12321 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12322 if (TREE_CODE (f) == FIELD_DECL)
12324 HOST_WIDE_INT bitpos = startbitpos;
12325 tree ftype = TREE_TYPE (f);
12326 machine_mode mode;
12327 if (ftype == error_mark_node)
12328 continue;
12329 mode = TYPE_MODE (ftype);
12331 if (DECL_SIZE (f) != 0
12332 && tree_fits_uhwi_p (bit_position (f)))
12333 bitpos += int_bit_position (f);
12335 /* ??? FIXME: else assume zero offset. */
12337 if (TREE_CODE (ftype) == RECORD_TYPE)
12338 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12339 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12341 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12342 #if 0
12343 switch (mode)
12345 case SCmode: mode = SFmode; break;
12346 case DCmode: mode = DFmode; break;
12347 case TCmode: mode = TFmode; break;
12348 default: break;
12350 #endif
12351 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12352 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12354 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12355 && (mode == TFmode || mode == TDmode));
12356 /* Long double or _Decimal128 split over regs and memory. */
12357 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12358 cum->use_stack=1;
12360 rvec[(*k)++]
12361 = gen_rtx_EXPR_LIST (VOIDmode,
12362 gen_rtx_REG (mode, cum->fregno++),
12363 GEN_INT (bitpos / BITS_PER_UNIT));
12364 if (FLOAT128_2REG_P (mode))
12365 cum->fregno++;
12367 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12369 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12370 rvec[(*k)++]
12371 = gen_rtx_EXPR_LIST (VOIDmode,
12372 gen_rtx_REG (mode, cum->vregno++),
12373 GEN_INT (bitpos / BITS_PER_UNIT));
12375 else if (cum->intoffset == -1)
12376 cum->intoffset = bitpos;
12380 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12381 the register(s) to be used for each field and subfield of a struct
12382 being passed by value, along with the offset of where the
12383 register's value may be found in the block. FP fields go in FP
12384 register, vector fields go in vector registers, and everything
12385 else goes in int registers, packed as in memory.
12387 This code is also used for function return values. RETVAL indicates
12388 whether this is the case.
12390 Much of this is taken from the SPARC V9 port, which has a similar
12391 calling convention. */
12393 static rtx
12394 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12395 bool named, bool retval)
12397 rtx rvec[FIRST_PSEUDO_REGISTER];
12398 int k = 1, kbase = 1;
12399 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12400 /* This is a copy; modifications are not visible to our caller. */
12401 CUMULATIVE_ARGS copy_cum = *orig_cum;
12402 CUMULATIVE_ARGS *cum = &copy_cum;
12404 /* Pad to 16 byte boundary if needed. */
12405 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12406 && (cum->words % 2) != 0)
12407 cum->words++;
12409 cum->intoffset = 0;
12410 cum->use_stack = 0;
12411 cum->named = named;
12413 /* Put entries into rvec[] for individual FP and vector fields, and
12414 for the chunks of memory that go in int regs. Note we start at
12415 element 1; 0 is reserved for an indication of using memory, and
12416 may or may not be filled in below. */
12417 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12418 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12420 /* If any part of the struct went on the stack put all of it there.
12421 This hack is because the generic code for
12422 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12423 parts of the struct are not at the beginning. */
12424 if (cum->use_stack)
12426 if (retval)
12427 return NULL_RTX; /* doesn't go in registers at all */
12428 kbase = 0;
12429 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12431 if (k > 1 || cum->use_stack)
12432 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12433 else
12434 return NULL_RTX;
12437 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12439 static rtx
12440 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12441 int align_words)
12443 int n_units;
12444 int i, k;
12445 rtx rvec[GP_ARG_NUM_REG + 1];
12447 if (align_words >= GP_ARG_NUM_REG)
12448 return NULL_RTX;
12450 n_units = rs6000_arg_size (mode, type);
12452 /* Optimize the simple case where the arg fits in one gpr, except in
12453 the case of BLKmode due to assign_parms assuming that registers are
12454 BITS_PER_WORD wide. */
12455 if (n_units == 0
12456 || (n_units == 1 && mode != BLKmode))
12457 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12459 k = 0;
12460 if (align_words + n_units > GP_ARG_NUM_REG)
12461 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12462 using a magic NULL_RTX component.
12463 This is not strictly correct. Only some of the arg belongs in
12464 memory, not all of it. However, the normal scheme using
12465 function_arg_partial_nregs can result in unusual subregs, eg.
12466 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12467 store the whole arg to memory is often more efficient than code
12468 to store pieces, and we know that space is available in the right
12469 place for the whole arg. */
12470 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12472 i = 0;
12475 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12476 rtx off = GEN_INT (i++ * 4);
12477 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12479 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12481 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12484 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12485 but must also be copied into the parameter save area starting at
12486 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12487 to the GPRs and/or memory. Return the number of elements used. */
12489 static int
12490 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12491 int align_words, rtx *rvec)
12493 int k = 0;
12495 if (align_words < GP_ARG_NUM_REG)
12497 int n_words = rs6000_arg_size (mode, type);
12499 if (align_words + n_words > GP_ARG_NUM_REG
12500 || mode == BLKmode
12501 || (TARGET_32BIT && TARGET_POWERPC64))
12503 /* If this is partially on the stack, then we only
12504 include the portion actually in registers here. */
12505 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12506 int i = 0;
12508 if (align_words + n_words > GP_ARG_NUM_REG)
12510 /* Not all of the arg fits in gprs. Say that it goes in memory
12511 too, using a magic NULL_RTX component. Also see comment in
12512 rs6000_mixed_function_arg for why the normal
12513 function_arg_partial_nregs scheme doesn't work in this case. */
12514 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12519 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12520 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12521 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12523 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12525 else
12527 /* The whole arg fits in gprs. */
12528 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12529 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12532 else
12534 /* It's entirely in memory. */
12535 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12538 return k;
12541 /* RVEC is a vector of K components of an argument of mode MODE.
12542 Construct the final function_arg return value from it. */
12544 static rtx
12545 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12547 gcc_assert (k >= 1);
12549 /* Avoid returning a PARALLEL in the trivial cases. */
12550 if (k == 1)
12552 if (XEXP (rvec[0], 0) == NULL_RTX)
12553 return NULL_RTX;
12555 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12556 return XEXP (rvec[0], 0);
12559 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12562 /* Determine where to put an argument to a function.
12563 Value is zero to push the argument on the stack,
12564 or a hard register in which to store the argument.
12566 MODE is the argument's machine mode.
12567 TYPE is the data type of the argument (as a tree).
12568 This is null for libcalls where that information may
12569 not be available.
12570 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12571 the preceding args and about the function being called. It is
12572 not modified in this routine.
12573 NAMED is nonzero if this argument is a named parameter
12574 (otherwise it is an extra parameter matching an ellipsis).
12576 On RS/6000 the first eight words of non-FP are normally in registers
12577 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12578 Under V.4, the first 8 FP args are in registers.
12580 If this is floating-point and no prototype is specified, we use
12581 both an FP and integer register (or possibly FP reg and stack). Library
12582 functions (when CALL_LIBCALL is set) always have the proper types for args,
12583 so we can pass the FP value just in one register. emit_library_function
12584 doesn't support PARALLEL anyway.
12586 Note that for args passed by reference, function_arg will be called
12587 with MODE and TYPE set to that of the pointer to the arg, not the arg
12588 itself. */
12590 static rtx
12591 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12592 const_tree type, bool named)
12594 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12595 enum rs6000_abi abi = DEFAULT_ABI;
12596 machine_mode elt_mode;
12597 int n_elts;
12599 /* Return a marker to indicate whether CR1 needs to set or clear the
12600 bit that V.4 uses to say fp args were passed in registers.
12601 Assume that we don't need the marker for software floating point,
12602 or compiler generated library calls. */
12603 if (mode == VOIDmode)
12605 if (abi == ABI_V4
12606 && (cum->call_cookie & CALL_LIBCALL) == 0
12607 && (cum->stdarg
12608 || (cum->nargs_prototype < 0
12609 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12611 /* For the SPE, we need to crxor CR6 always. */
12612 if (TARGET_SPE_ABI)
12613 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12614 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12615 return GEN_INT (cum->call_cookie
12616 | ((cum->fregno == FP_ARG_MIN_REG)
12617 ? CALL_V4_SET_FP_ARGS
12618 : CALL_V4_CLEAR_FP_ARGS));
12621 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12624 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12626 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12628 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12629 if (rslt != NULL_RTX)
12630 return rslt;
12631 /* Else fall through to usual handling. */
12634 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12636 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12637 rtx r, off;
12638 int i, k = 0;
12640 /* Do we also need to pass this argument in the parameter save area?
12641 Library support functions for IEEE 128-bit are assumed to not need the
12642 value passed both in GPRs and in vector registers. */
12643 if (TARGET_64BIT && !cum->prototype
12644 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12646 int align_words = ROUND_UP (cum->words, 2);
12647 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12650 /* Describe where this argument goes in the vector registers. */
12651 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12653 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12654 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12655 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12658 return rs6000_finish_function_arg (mode, rvec, k);
12660 else if (TARGET_ALTIVEC_ABI
12661 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12662 || (type && TREE_CODE (type) == VECTOR_TYPE
12663 && int_size_in_bytes (type) == 16)))
12665 if (named || abi == ABI_V4)
12666 return NULL_RTX;
12667 else
12669 /* Vector parameters to varargs functions under AIX or Darwin
12670 get passed in memory and possibly also in GPRs. */
12671 int align, align_words, n_words;
12672 machine_mode part_mode;
12674 /* Vector parameters must be 16-byte aligned. In 32-bit
12675 mode this means we need to take into account the offset
12676 to the parameter save area. In 64-bit mode, they just
12677 have to start on an even word, since the parameter save
12678 area is 16-byte aligned. */
12679 if (TARGET_32BIT)
12680 align = -(rs6000_parm_offset () + cum->words) & 3;
12681 else
12682 align = cum->words & 1;
12683 align_words = cum->words + align;
12685 /* Out of registers? Memory, then. */
12686 if (align_words >= GP_ARG_NUM_REG)
12687 return NULL_RTX;
12689 if (TARGET_32BIT && TARGET_POWERPC64)
12690 return rs6000_mixed_function_arg (mode, type, align_words);
12692 /* The vector value goes in GPRs. Only the part of the
12693 value in GPRs is reported here. */
12694 part_mode = mode;
12695 n_words = rs6000_arg_size (mode, type);
12696 if (align_words + n_words > GP_ARG_NUM_REG)
12697 /* Fortunately, there are only two possibilities, the value
12698 is either wholly in GPRs or half in GPRs and half not. */
12699 part_mode = DImode;
12701 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12704 else if (TARGET_SPE_ABI && TARGET_SPE
12705 && (SPE_VECTOR_MODE (mode)
12706 || (TARGET_E500_DOUBLE && (mode == DFmode
12707 || mode == DCmode
12708 || mode == TFmode
12709 || mode == TCmode))))
12710 return rs6000_spe_function_arg (cum, mode, type);
12712 else if (abi == ABI_V4)
12714 if (abi_v4_pass_in_fpr (mode))
12716 /* _Decimal128 must use an even/odd register pair. This assumes
12717 that the register number is odd when fregno is odd. */
12718 if (mode == TDmode && (cum->fregno % 2) == 1)
12719 cum->fregno++;
12721 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12722 <= FP_ARG_V4_MAX_REG)
12723 return gen_rtx_REG (mode, cum->fregno);
12724 else
12725 return NULL_RTX;
12727 else
12729 int n_words = rs6000_arg_size (mode, type);
12730 int gregno = cum->sysv_gregno;
12732 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12733 (r7,r8) or (r9,r10). As does any other 2 word item such
12734 as complex int due to a historical mistake. */
12735 if (n_words == 2)
12736 gregno += (1 - gregno) & 1;
12738 /* Multi-reg args are not split between registers and stack. */
12739 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12740 return NULL_RTX;
12742 if (TARGET_32BIT && TARGET_POWERPC64)
12743 return rs6000_mixed_function_arg (mode, type,
12744 gregno - GP_ARG_MIN_REG);
12745 return gen_rtx_REG (mode, gregno);
12748 else
12750 int align_words = rs6000_parm_start (mode, type, cum->words);
12752 /* _Decimal128 must be passed in an even/odd float register pair.
12753 This assumes that the register number is odd when fregno is odd. */
12754 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12755 cum->fregno++;
12757 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12759 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12760 rtx r, off;
12761 int i, k = 0;
12762 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12763 int fpr_words;
12765 /* Do we also need to pass this argument in the parameter
12766 save area? */
12767 if (type && (cum->nargs_prototype <= 0
12768 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12769 && TARGET_XL_COMPAT
12770 && align_words >= GP_ARG_NUM_REG)))
12771 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12773 /* Describe where this argument goes in the fprs. */
12774 for (i = 0; i < n_elts
12775 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12777 /* Check if the argument is split over registers and memory.
12778 This can only ever happen for long double or _Decimal128;
12779 complex types are handled via split_complex_arg. */
12780 machine_mode fmode = elt_mode;
12781 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12783 gcc_assert (FLOAT128_2REG_P (fmode));
12784 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12787 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12788 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12789 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12792 /* If there were not enough FPRs to hold the argument, the rest
12793 usually goes into memory. However, if the current position
12794 is still within the register parameter area, a portion may
12795 actually have to go into GPRs.
12797 Note that it may happen that the portion of the argument
12798 passed in the first "half" of the first GPR was already
12799 passed in the last FPR as well.
12801 For unnamed arguments, we already set up GPRs to cover the
12802 whole argument in rs6000_psave_function_arg, so there is
12803 nothing further to do at this point. */
12804 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12805 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12806 && cum->nargs_prototype > 0)
12808 static bool warned;
12810 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12811 int n_words = rs6000_arg_size (mode, type);
12813 align_words += fpr_words;
12814 n_words -= fpr_words;
12818 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12819 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12820 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12822 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12824 if (!warned && warn_psabi)
12826 warned = true;
12827 inform (input_location,
12828 "the ABI of passing homogeneous float aggregates"
12829 " has changed in GCC 5");
12833 return rs6000_finish_function_arg (mode, rvec, k);
12835 else if (align_words < GP_ARG_NUM_REG)
12837 if (TARGET_32BIT && TARGET_POWERPC64)
12838 return rs6000_mixed_function_arg (mode, type, align_words);
12840 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12842 else
12843 return NULL_RTX;
12847 /* For an arg passed partly in registers and partly in memory, this is
12848 the number of bytes passed in registers. For args passed entirely in
12849 registers or entirely in memory, zero. When an arg is described by a
12850 PARALLEL, perhaps using more than one register type, this function
12851 returns the number of bytes used by the first element of the PARALLEL. */
12853 static int
12854 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12855 tree type, bool named)
12857 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12858 bool passed_in_gprs = true;
12859 int ret = 0;
12860 int align_words;
12861 machine_mode elt_mode;
12862 int n_elts;
12864 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12866 if (DEFAULT_ABI == ABI_V4)
12867 return 0;
12869 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12871 /* If we are passing this arg in the fixed parameter save area (gprs or
12872 memory) as well as VRs, we do not use the partial bytes mechanism;
12873 instead, rs6000_function_arg will return a PARALLEL including a memory
12874 element as necessary. Library support functions for IEEE 128-bit are
12875 assumed to not need the value passed both in GPRs and in vector
12876 registers. */
12877 if (TARGET_64BIT && !cum->prototype
12878 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12879 return 0;
12881 /* Otherwise, we pass in VRs only. Check for partial copies. */
12882 passed_in_gprs = false;
12883 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12884 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12887 /* In this complicated case we just disable the partial_nregs code. */
12888 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12889 return 0;
12891 align_words = rs6000_parm_start (mode, type, cum->words);
12893 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12895 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12897 /* If we are passing this arg in the fixed parameter save area
12898 (gprs or memory) as well as FPRs, we do not use the partial
12899 bytes mechanism; instead, rs6000_function_arg will return a
12900 PARALLEL including a memory element as necessary. */
12901 if (type
12902 && (cum->nargs_prototype <= 0
12903 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12904 && TARGET_XL_COMPAT
12905 && align_words >= GP_ARG_NUM_REG)))
12906 return 0;
12908 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12909 passed_in_gprs = false;
12910 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12912 /* Compute number of bytes / words passed in FPRs. If there
12913 is still space available in the register parameter area
12914 *after* that amount, a part of the argument will be passed
12915 in GPRs. In that case, the total amount passed in any
12916 registers is equal to the amount that would have been passed
12917 in GPRs if everything were passed there, so we fall back to
12918 the GPR code below to compute the appropriate value. */
12919 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12920 * MIN (8, GET_MODE_SIZE (elt_mode)));
12921 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12923 if (align_words + fpr_words < GP_ARG_NUM_REG)
12924 passed_in_gprs = true;
12925 else
12926 ret = fpr;
12930 if (passed_in_gprs
12931 && align_words < GP_ARG_NUM_REG
12932 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12933 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12935 if (ret != 0 && TARGET_DEBUG_ARG)
12936 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12938 return ret;
12941 /* A C expression that indicates when an argument must be passed by
12942 reference. If nonzero for an argument, a copy of that argument is
12943 made in memory and a pointer to the argument is passed instead of
12944 the argument itself. The pointer is passed in whatever way is
12945 appropriate for passing a pointer to that type.
12947 Under V.4, aggregates and long double are passed by reference.
12949 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12950 reference unless the AltiVec vector extension ABI is in force.
12952 As an extension to all ABIs, variable sized types are passed by
12953 reference. */
12955 static bool
12956 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12957 machine_mode mode, const_tree type,
12958 bool named ATTRIBUTE_UNUSED)
12960 if (!type)
12961 return 0;
12963 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12964 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12966 if (TARGET_DEBUG_ARG)
12967 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12968 return 1;
12971 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12973 if (TARGET_DEBUG_ARG)
12974 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12975 return 1;
12978 if (int_size_in_bytes (type) < 0)
12980 if (TARGET_DEBUG_ARG)
12981 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12982 return 1;
12985 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12986 modes only exist for GCC vector types if -maltivec. */
12987 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12989 if (TARGET_DEBUG_ARG)
12990 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12991 return 1;
12994 /* Pass synthetic vectors in memory. */
12995 if (TREE_CODE (type) == VECTOR_TYPE
12996 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12998 static bool warned_for_pass_big_vectors = false;
12999 if (TARGET_DEBUG_ARG)
13000 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13001 if (!warned_for_pass_big_vectors)
13003 warning (OPT_Wpsabi, "GCC vector passed by reference: "
13004 "non-standard ABI extension with no compatibility guarantee");
13005 warned_for_pass_big_vectors = true;
13007 return 1;
13010 return 0;
13013 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13014 already processes. Return true if the parameter must be passed
13015 (fully or partially) on the stack. */
13017 static bool
13018 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13020 machine_mode mode;
13021 int unsignedp;
13022 rtx entry_parm;
13024 /* Catch errors. */
13025 if (type == NULL || type == error_mark_node)
13026 return true;
13028 /* Handle types with no storage requirement. */
13029 if (TYPE_MODE (type) == VOIDmode)
13030 return false;
13032 /* Handle complex types. */
13033 if (TREE_CODE (type) == COMPLEX_TYPE)
13034 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13035 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13037 /* Handle transparent aggregates. */
13038 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13039 && TYPE_TRANSPARENT_AGGR (type))
13040 type = TREE_TYPE (first_field (type));
13042 /* See if this arg was passed by invisible reference. */
13043 if (pass_by_reference (get_cumulative_args (args_so_far),
13044 TYPE_MODE (type), type, true))
13045 type = build_pointer_type (type);
13047 /* Find mode as it is passed by the ABI. */
13048 unsignedp = TYPE_UNSIGNED (type);
13049 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13051 /* If we must pass in stack, we need a stack. */
13052 if (rs6000_must_pass_in_stack (mode, type))
13053 return true;
13055 /* If there is no incoming register, we need a stack. */
13056 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13057 if (entry_parm == NULL)
13058 return true;
13060 /* Likewise if we need to pass both in registers and on the stack. */
13061 if (GET_CODE (entry_parm) == PARALLEL
13062 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13063 return true;
13065 /* Also true if we're partially in registers and partially not. */
13066 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13067 return true;
13069 /* Update info on where next arg arrives in registers. */
13070 rs6000_function_arg_advance (args_so_far, mode, type, true);
13071 return false;
13074 /* Return true if FUN has no prototype, has a variable argument
13075 list, or passes any parameter in memory. */
13077 static bool
13078 rs6000_function_parms_need_stack (tree fun, bool incoming)
13080 tree fntype, result;
13081 CUMULATIVE_ARGS args_so_far_v;
13082 cumulative_args_t args_so_far;
13084 if (!fun)
13085 /* Must be a libcall, all of which only use reg parms. */
13086 return false;
13088 fntype = fun;
13089 if (!TYPE_P (fun))
13090 fntype = TREE_TYPE (fun);
13092 /* Varargs functions need the parameter save area. */
13093 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13094 return true;
13096 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13097 args_so_far = pack_cumulative_args (&args_so_far_v);
13099 /* When incoming, we will have been passed the function decl.
13100 It is necessary to use the decl to handle K&R style functions,
13101 where TYPE_ARG_TYPES may not be available. */
13102 if (incoming)
13104 gcc_assert (DECL_P (fun));
13105 result = DECL_RESULT (fun);
13107 else
13108 result = TREE_TYPE (fntype);
13110 if (result && aggregate_value_p (result, fntype))
13112 if (!TYPE_P (result))
13113 result = TREE_TYPE (result);
13114 result = build_pointer_type (result);
13115 rs6000_parm_needs_stack (args_so_far, result);
13118 if (incoming)
13120 tree parm;
13122 for (parm = DECL_ARGUMENTS (fun);
13123 parm && parm != void_list_node;
13124 parm = TREE_CHAIN (parm))
13125 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13126 return true;
13128 else
13130 function_args_iterator args_iter;
13131 tree arg_type;
13133 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13134 if (rs6000_parm_needs_stack (args_so_far, arg_type))
13135 return true;
13138 return false;
13141 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13142 usually a constant depending on the ABI. However, in the ELFv2 ABI
13143 the register parameter area is optional when calling a function that
13144 has a prototype is scope, has no variable argument list, and passes
13145 all parameters in registers. */
13148 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13150 int reg_parm_stack_space;
13152 switch (DEFAULT_ABI)
13154 default:
13155 reg_parm_stack_space = 0;
13156 break;
13158 case ABI_AIX:
13159 case ABI_DARWIN:
13160 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13161 break;
13163 case ABI_ELFv2:
13164 /* ??? Recomputing this every time is a bit expensive. Is there
13165 a place to cache this information? */
13166 if (rs6000_function_parms_need_stack (fun, incoming))
13167 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13168 else
13169 reg_parm_stack_space = 0;
13170 break;
13173 return reg_parm_stack_space;
13176 static void
13177 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13179 int i;
13180 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13182 if (nregs == 0)
13183 return;
13185 for (i = 0; i < nregs; i++)
13187 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13188 if (reload_completed)
13190 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13191 tem = NULL_RTX;
13192 else
13193 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13194 i * GET_MODE_SIZE (reg_mode));
13196 else
13197 tem = replace_equiv_address (tem, XEXP (tem, 0));
13199 gcc_assert (tem);
13201 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13205 /* Perform any needed actions needed for a function that is receiving a
13206 variable number of arguments.
13208 CUM is as above.
13210 MODE and TYPE are the mode and type of the current parameter.
13212 PRETEND_SIZE is a variable that should be set to the amount of stack
13213 that must be pushed by the prolog to pretend that our caller pushed
13216 Normally, this macro will push all remaining incoming registers on the
13217 stack and set PRETEND_SIZE to the length of the registers pushed. */
13219 static void
13220 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13221 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13222 int no_rtl)
13224 CUMULATIVE_ARGS next_cum;
13225 int reg_size = TARGET_32BIT ? 4 : 8;
13226 rtx save_area = NULL_RTX, mem;
13227 int first_reg_offset;
13228 alias_set_type set;
13230 /* Skip the last named argument. */
13231 next_cum = *get_cumulative_args (cum);
13232 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13234 if (DEFAULT_ABI == ABI_V4)
13236 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13238 if (! no_rtl)
13240 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13241 HOST_WIDE_INT offset = 0;
13243 /* Try to optimize the size of the varargs save area.
13244 The ABI requires that ap.reg_save_area is doubleword
13245 aligned, but we don't need to allocate space for all
13246 the bytes, only those to which we actually will save
13247 anything. */
13248 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13249 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13250 if (TARGET_HARD_FLOAT && TARGET_FPRS
13251 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13252 && cfun->va_list_fpr_size)
13254 if (gpr_reg_num)
13255 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13256 * UNITS_PER_FP_WORD;
13257 if (cfun->va_list_fpr_size
13258 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13259 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13260 else
13261 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13262 * UNITS_PER_FP_WORD;
13264 if (gpr_reg_num)
13266 offset = -((first_reg_offset * reg_size) & ~7);
13267 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13269 gpr_reg_num = cfun->va_list_gpr_size;
13270 if (reg_size == 4 && (first_reg_offset & 1))
13271 gpr_reg_num++;
13273 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13275 else if (fpr_size)
13276 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13277 * UNITS_PER_FP_WORD
13278 - (int) (GP_ARG_NUM_REG * reg_size);
13280 if (gpr_size + fpr_size)
13282 rtx reg_save_area
13283 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13284 gcc_assert (GET_CODE (reg_save_area) == MEM);
13285 reg_save_area = XEXP (reg_save_area, 0);
13286 if (GET_CODE (reg_save_area) == PLUS)
13288 gcc_assert (XEXP (reg_save_area, 0)
13289 == virtual_stack_vars_rtx);
13290 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13291 offset += INTVAL (XEXP (reg_save_area, 1));
13293 else
13294 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13297 cfun->machine->varargs_save_offset = offset;
13298 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13301 else
13303 first_reg_offset = next_cum.words;
13304 save_area = crtl->args.internal_arg_pointer;
13306 if (targetm.calls.must_pass_in_stack (mode, type))
13307 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13310 set = get_varargs_alias_set ();
13311 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13312 && cfun->va_list_gpr_size)
13314 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13316 if (va_list_gpr_counter_field)
13317 /* V4 va_list_gpr_size counts number of registers needed. */
13318 n_gpr = cfun->va_list_gpr_size;
13319 else
13320 /* char * va_list instead counts number of bytes needed. */
13321 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13323 if (nregs > n_gpr)
13324 nregs = n_gpr;
13326 mem = gen_rtx_MEM (BLKmode,
13327 plus_constant (Pmode, save_area,
13328 first_reg_offset * reg_size));
13329 MEM_NOTRAP_P (mem) = 1;
13330 set_mem_alias_set (mem, set);
13331 set_mem_align (mem, BITS_PER_WORD);
13333 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13334 nregs);
13337 /* Save FP registers if needed. */
13338 if (DEFAULT_ABI == ABI_V4
13339 && TARGET_HARD_FLOAT && TARGET_FPRS
13340 && ! no_rtl
13341 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13342 && cfun->va_list_fpr_size)
13344 int fregno = next_cum.fregno, nregs;
13345 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13346 rtx lab = gen_label_rtx ();
13347 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13348 * UNITS_PER_FP_WORD);
13350 emit_jump_insn
13351 (gen_rtx_SET (pc_rtx,
13352 gen_rtx_IF_THEN_ELSE (VOIDmode,
13353 gen_rtx_NE (VOIDmode, cr1,
13354 const0_rtx),
13355 gen_rtx_LABEL_REF (VOIDmode, lab),
13356 pc_rtx)));
13358 for (nregs = 0;
13359 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13360 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13362 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13363 ? DFmode : SFmode,
13364 plus_constant (Pmode, save_area, off));
13365 MEM_NOTRAP_P (mem) = 1;
13366 set_mem_alias_set (mem, set);
13367 set_mem_align (mem, GET_MODE_ALIGNMENT (
13368 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13369 ? DFmode : SFmode));
13370 emit_move_insn (mem, gen_rtx_REG (
13371 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13372 ? DFmode : SFmode, fregno));
13375 emit_label (lab);
13379 /* Create the va_list data type. */
13381 static tree
13382 rs6000_build_builtin_va_list (void)
13384 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13386 /* For AIX, prefer 'char *' because that's what the system
13387 header files like. */
13388 if (DEFAULT_ABI != ABI_V4)
13389 return build_pointer_type (char_type_node);
13391 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13392 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13393 get_identifier ("__va_list_tag"), record);
13395 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13396 unsigned_char_type_node);
13397 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13398 unsigned_char_type_node);
13399 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13400 every user file. */
13401 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13402 get_identifier ("reserved"), short_unsigned_type_node);
13403 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13404 get_identifier ("overflow_arg_area"),
13405 ptr_type_node);
13406 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13407 get_identifier ("reg_save_area"),
13408 ptr_type_node);
13410 va_list_gpr_counter_field = f_gpr;
13411 va_list_fpr_counter_field = f_fpr;
13413 DECL_FIELD_CONTEXT (f_gpr) = record;
13414 DECL_FIELD_CONTEXT (f_fpr) = record;
13415 DECL_FIELD_CONTEXT (f_res) = record;
13416 DECL_FIELD_CONTEXT (f_ovf) = record;
13417 DECL_FIELD_CONTEXT (f_sav) = record;
13419 TYPE_STUB_DECL (record) = type_decl;
13420 TYPE_NAME (record) = type_decl;
13421 TYPE_FIELDS (record) = f_gpr;
13422 DECL_CHAIN (f_gpr) = f_fpr;
13423 DECL_CHAIN (f_fpr) = f_res;
13424 DECL_CHAIN (f_res) = f_ovf;
13425 DECL_CHAIN (f_ovf) = f_sav;
13427 layout_type (record);
13429 /* The correct type is an array type of one element. */
13430 return build_array_type (record, build_index_type (size_zero_node));
13433 /* Implement va_start. */
13435 static void
13436 rs6000_va_start (tree valist, rtx nextarg)
13438 HOST_WIDE_INT words, n_gpr, n_fpr;
13439 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13440 tree gpr, fpr, ovf, sav, t;
13442 /* Only SVR4 needs something special. */
13443 if (DEFAULT_ABI != ABI_V4)
13445 std_expand_builtin_va_start (valist, nextarg);
13446 return;
13449 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13450 f_fpr = DECL_CHAIN (f_gpr);
13451 f_res = DECL_CHAIN (f_fpr);
13452 f_ovf = DECL_CHAIN (f_res);
13453 f_sav = DECL_CHAIN (f_ovf);
13455 valist = build_simple_mem_ref (valist);
13456 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13457 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13458 f_fpr, NULL_TREE);
13459 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13460 f_ovf, NULL_TREE);
13461 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13462 f_sav, NULL_TREE);
13464 /* Count number of gp and fp argument registers used. */
13465 words = crtl->args.info.words;
13466 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13467 GP_ARG_NUM_REG);
13468 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13469 FP_ARG_NUM_REG);
13471 if (TARGET_DEBUG_ARG)
13472 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13473 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13474 words, n_gpr, n_fpr);
13476 if (cfun->va_list_gpr_size)
13478 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13479 build_int_cst (NULL_TREE, n_gpr));
13480 TREE_SIDE_EFFECTS (t) = 1;
13481 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13484 if (cfun->va_list_fpr_size)
13486 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13487 build_int_cst (NULL_TREE, n_fpr));
13488 TREE_SIDE_EFFECTS (t) = 1;
13489 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13491 #ifdef HAVE_AS_GNU_ATTRIBUTE
13492 if (call_ABI_of_interest (cfun->decl))
13493 rs6000_passes_float = true;
13494 #endif
13497 /* Find the overflow area. */
13498 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13499 if (words != 0)
13500 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13501 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13502 TREE_SIDE_EFFECTS (t) = 1;
13503 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13505 /* If there were no va_arg invocations, don't set up the register
13506 save area. */
13507 if (!cfun->va_list_gpr_size
13508 && !cfun->va_list_fpr_size
13509 && n_gpr < GP_ARG_NUM_REG
13510 && n_fpr < FP_ARG_V4_MAX_REG)
13511 return;
13513 /* Find the register save area. */
13514 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13515 if (cfun->machine->varargs_save_offset)
13516 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13517 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13518 TREE_SIDE_EFFECTS (t) = 1;
13519 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13522 /* Implement va_arg. */
13524 static tree
13525 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13526 gimple_seq *post_p)
13528 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13529 tree gpr, fpr, ovf, sav, reg, t, u;
13530 int size, rsize, n_reg, sav_ofs, sav_scale;
13531 tree lab_false, lab_over, addr;
13532 int align;
13533 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13534 int regalign = 0;
13535 gimple *stmt;
13537 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13539 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13540 return build_va_arg_indirect_ref (t);
13543 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13544 earlier version of gcc, with the property that it always applied alignment
13545 adjustments to the va-args (even for zero-sized types). The cheapest way
13546 to deal with this is to replicate the effect of the part of
13547 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13548 of relevance.
13549 We don't need to check for pass-by-reference because of the test above.
13550 We can return a simplifed answer, since we know there's no offset to add. */
13552 if (((TARGET_MACHO
13553 && rs6000_darwin64_abi)
13554 || DEFAULT_ABI == ABI_ELFv2
13555 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13556 && integer_zerop (TYPE_SIZE (type)))
13558 unsigned HOST_WIDE_INT align, boundary;
13559 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13560 align = PARM_BOUNDARY / BITS_PER_UNIT;
13561 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13562 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13563 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13564 boundary /= BITS_PER_UNIT;
13565 if (boundary > align)
13567 tree t ;
13568 /* This updates arg ptr by the amount that would be necessary
13569 to align the zero-sized (but not zero-alignment) item. */
13570 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13571 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13572 gimplify_and_add (t, pre_p);
13574 t = fold_convert (sizetype, valist_tmp);
13575 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13576 fold_convert (TREE_TYPE (valist),
13577 fold_build2 (BIT_AND_EXPR, sizetype, t,
13578 size_int (-boundary))));
13579 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13580 gimplify_and_add (t, pre_p);
13582 /* Since it is zero-sized there's no increment for the item itself. */
13583 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13584 return build_va_arg_indirect_ref (valist_tmp);
13587 if (DEFAULT_ABI != ABI_V4)
13589 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13591 tree elem_type = TREE_TYPE (type);
13592 machine_mode elem_mode = TYPE_MODE (elem_type);
13593 int elem_size = GET_MODE_SIZE (elem_mode);
13595 if (elem_size < UNITS_PER_WORD)
13597 tree real_part, imag_part;
13598 gimple_seq post = NULL;
13600 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13601 &post);
13602 /* Copy the value into a temporary, lest the formal temporary
13603 be reused out from under us. */
13604 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13605 gimple_seq_add_seq (pre_p, post);
13607 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13608 post_p);
13610 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13614 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13617 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13618 f_fpr = DECL_CHAIN (f_gpr);
13619 f_res = DECL_CHAIN (f_fpr);
13620 f_ovf = DECL_CHAIN (f_res);
13621 f_sav = DECL_CHAIN (f_ovf);
13623 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13624 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13625 f_fpr, NULL_TREE);
13626 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13627 f_ovf, NULL_TREE);
13628 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13629 f_sav, NULL_TREE);
13631 size = int_size_in_bytes (type);
13632 rsize = (size + 3) / 4;
13633 align = 1;
13635 machine_mode mode = TYPE_MODE (type);
13636 if (abi_v4_pass_in_fpr (mode))
13638 /* FP args go in FP registers, if present. */
13639 reg = fpr;
13640 n_reg = (size + 7) / 8;
13641 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13642 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13643 if (mode != SFmode && mode != SDmode)
13644 align = 8;
13646 else
13648 /* Otherwise into GP registers. */
13649 reg = gpr;
13650 n_reg = rsize;
13651 sav_ofs = 0;
13652 sav_scale = 4;
13653 if (n_reg == 2)
13654 align = 8;
13657 /* Pull the value out of the saved registers.... */
13659 lab_over = NULL;
13660 addr = create_tmp_var (ptr_type_node, "addr");
13662 /* AltiVec vectors never go in registers when -mabi=altivec. */
13663 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13664 align = 16;
13665 else
13667 lab_false = create_artificial_label (input_location);
13668 lab_over = create_artificial_label (input_location);
13670 /* Long long and SPE vectors are aligned in the registers.
13671 As are any other 2 gpr item such as complex int due to a
13672 historical mistake. */
13673 u = reg;
13674 if (n_reg == 2 && reg == gpr)
13676 regalign = 1;
13677 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13678 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13679 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13680 unshare_expr (reg), u);
13682 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13683 reg number is 0 for f1, so we want to make it odd. */
13684 else if (reg == fpr && mode == TDmode)
13686 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13687 build_int_cst (TREE_TYPE (reg), 1));
13688 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13691 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13692 t = build2 (GE_EXPR, boolean_type_node, u, t);
13693 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13694 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13695 gimplify_and_add (t, pre_p);
13697 t = sav;
13698 if (sav_ofs)
13699 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13701 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13702 build_int_cst (TREE_TYPE (reg), n_reg));
13703 u = fold_convert (sizetype, u);
13704 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13705 t = fold_build_pointer_plus (t, u);
13707 /* _Decimal32 varargs are located in the second word of the 64-bit
13708 FP register for 32-bit binaries. */
13709 if (TARGET_32BIT
13710 && TARGET_HARD_FLOAT && TARGET_FPRS
13711 && mode == SDmode)
13712 t = fold_build_pointer_plus_hwi (t, size);
13714 gimplify_assign (addr, t, pre_p);
13716 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13718 stmt = gimple_build_label (lab_false);
13719 gimple_seq_add_stmt (pre_p, stmt);
13721 if ((n_reg == 2 && !regalign) || n_reg > 2)
13723 /* Ensure that we don't find any more args in regs.
13724 Alignment has taken care of for special cases. */
13725 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13729 /* ... otherwise out of the overflow area. */
13731 /* Care for on-stack alignment if needed. */
13732 t = ovf;
13733 if (align != 1)
13735 t = fold_build_pointer_plus_hwi (t, align - 1);
13736 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13737 build_int_cst (TREE_TYPE (t), -align));
13739 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13741 gimplify_assign (unshare_expr (addr), t, pre_p);
13743 t = fold_build_pointer_plus_hwi (t, size);
13744 gimplify_assign (unshare_expr (ovf), t, pre_p);
13746 if (lab_over)
13748 stmt = gimple_build_label (lab_over);
13749 gimple_seq_add_stmt (pre_p, stmt);
13752 if (STRICT_ALIGNMENT
13753 && (TYPE_ALIGN (type)
13754 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13756 /* The value (of type complex double, for example) may not be
13757 aligned in memory in the saved registers, so copy via a
13758 temporary. (This is the same code as used for SPARC.) */
13759 tree tmp = create_tmp_var (type, "va_arg_tmp");
13760 tree dest_addr = build_fold_addr_expr (tmp);
13762 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13763 3, dest_addr, addr, size_int (rsize * 4));
13765 gimplify_and_add (copy, pre_p);
13766 addr = dest_addr;
13769 addr = fold_convert (ptrtype, addr);
13770 return build_va_arg_indirect_ref (addr);
13773 /* Builtins. */
13775 static void
13776 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13778 tree t;
13779 unsigned classify = rs6000_builtin_info[(int)code].attr;
13780 const char *attr_string = "";
13782 gcc_assert (name != NULL);
13783 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13785 if (rs6000_builtin_decls[(int)code])
13786 fatal_error (input_location,
13787 "internal error: builtin function %s already processed", name);
13789 rs6000_builtin_decls[(int)code] = t =
13790 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13792 /* Set any special attributes. */
13793 if ((classify & RS6000_BTC_CONST) != 0)
13795 /* const function, function only depends on the inputs. */
13796 TREE_READONLY (t) = 1;
13797 TREE_NOTHROW (t) = 1;
13798 attr_string = ", const";
13800 else if ((classify & RS6000_BTC_PURE) != 0)
13802 /* pure function, function can read global memory, but does not set any
13803 external state. */
13804 DECL_PURE_P (t) = 1;
13805 TREE_NOTHROW (t) = 1;
13806 attr_string = ", pure";
13808 else if ((classify & RS6000_BTC_FP) != 0)
13810 /* Function is a math function. If rounding mode is on, then treat the
13811 function as not reading global memory, but it can have arbitrary side
13812 effects. If it is off, then assume the function is a const function.
13813 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13814 builtin-attribute.def that is used for the math functions. */
13815 TREE_NOTHROW (t) = 1;
13816 if (flag_rounding_math)
13818 DECL_PURE_P (t) = 1;
13819 DECL_IS_NOVOPS (t) = 1;
13820 attr_string = ", fp, pure";
13822 else
13824 TREE_READONLY (t) = 1;
13825 attr_string = ", fp, const";
13828 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13829 gcc_unreachable ();
13831 if (TARGET_DEBUG_BUILTIN)
13832 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13833 (int)code, name, attr_string);
13836 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13838 #undef RS6000_BUILTIN_0
13839 #undef RS6000_BUILTIN_1
13840 #undef RS6000_BUILTIN_2
13841 #undef RS6000_BUILTIN_3
13842 #undef RS6000_BUILTIN_A
13843 #undef RS6000_BUILTIN_D
13844 #undef RS6000_BUILTIN_E
13845 #undef RS6000_BUILTIN_H
13846 #undef RS6000_BUILTIN_P
13847 #undef RS6000_BUILTIN_Q
13848 #undef RS6000_BUILTIN_S
13849 #undef RS6000_BUILTIN_X
13851 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13852 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13853 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13854 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13855 { MASK, ICODE, NAME, ENUM },
13857 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13858 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13859 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13860 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13861 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13862 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13863 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13864 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13866 static const struct builtin_description bdesc_3arg[] =
13868 #include "rs6000-builtin.def"
13871 /* DST operations: void foo (void *, const int, const char). */
13873 #undef RS6000_BUILTIN_0
13874 #undef RS6000_BUILTIN_1
13875 #undef RS6000_BUILTIN_2
13876 #undef RS6000_BUILTIN_3
13877 #undef RS6000_BUILTIN_A
13878 #undef RS6000_BUILTIN_D
13879 #undef RS6000_BUILTIN_E
13880 #undef RS6000_BUILTIN_H
13881 #undef RS6000_BUILTIN_P
13882 #undef RS6000_BUILTIN_Q
13883 #undef RS6000_BUILTIN_S
13884 #undef RS6000_BUILTIN_X
13886 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13887 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13888 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13889 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13890 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13891 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13892 { MASK, ICODE, NAME, ENUM },
13894 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13895 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13896 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13897 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13898 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13899 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13901 static const struct builtin_description bdesc_dst[] =
13903 #include "rs6000-builtin.def"
13906 /* Simple binary operations: VECc = foo (VECa, VECb). */
13908 #undef RS6000_BUILTIN_0
13909 #undef RS6000_BUILTIN_1
13910 #undef RS6000_BUILTIN_2
13911 #undef RS6000_BUILTIN_3
13912 #undef RS6000_BUILTIN_A
13913 #undef RS6000_BUILTIN_D
13914 #undef RS6000_BUILTIN_E
13915 #undef RS6000_BUILTIN_H
13916 #undef RS6000_BUILTIN_P
13917 #undef RS6000_BUILTIN_Q
13918 #undef RS6000_BUILTIN_S
13919 #undef RS6000_BUILTIN_X
13921 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13922 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13923 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13924 { MASK, ICODE, NAME, ENUM },
13926 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13927 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13928 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13929 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13930 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13931 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13932 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13933 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13934 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13936 static const struct builtin_description bdesc_2arg[] =
13938 #include "rs6000-builtin.def"
13941 #undef RS6000_BUILTIN_0
13942 #undef RS6000_BUILTIN_1
13943 #undef RS6000_BUILTIN_2
13944 #undef RS6000_BUILTIN_3
13945 #undef RS6000_BUILTIN_A
13946 #undef RS6000_BUILTIN_D
13947 #undef RS6000_BUILTIN_E
13948 #undef RS6000_BUILTIN_H
13949 #undef RS6000_BUILTIN_P
13950 #undef RS6000_BUILTIN_Q
13951 #undef RS6000_BUILTIN_S
13952 #undef RS6000_BUILTIN_X
13954 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13955 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13956 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13957 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13958 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13959 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13960 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13961 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13962 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13963 { MASK, ICODE, NAME, ENUM },
13965 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13966 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13967 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13969 /* AltiVec predicates. */
13971 static const struct builtin_description bdesc_altivec_preds[] =
13973 #include "rs6000-builtin.def"
13976 /* SPE predicates. */
13977 #undef RS6000_BUILTIN_0
13978 #undef RS6000_BUILTIN_1
13979 #undef RS6000_BUILTIN_2
13980 #undef RS6000_BUILTIN_3
13981 #undef RS6000_BUILTIN_A
13982 #undef RS6000_BUILTIN_D
13983 #undef RS6000_BUILTIN_E
13984 #undef RS6000_BUILTIN_H
13985 #undef RS6000_BUILTIN_P
13986 #undef RS6000_BUILTIN_Q
13987 #undef RS6000_BUILTIN_S
13988 #undef RS6000_BUILTIN_X
13990 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13991 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13992 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13993 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13994 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13995 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13996 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13997 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13998 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13999 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14000 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14001 { MASK, ICODE, NAME, ENUM },
14003 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14005 static const struct builtin_description bdesc_spe_predicates[] =
14007 #include "rs6000-builtin.def"
14010 /* SPE evsel predicates. */
14011 #undef RS6000_BUILTIN_0
14012 #undef RS6000_BUILTIN_1
14013 #undef RS6000_BUILTIN_2
14014 #undef RS6000_BUILTIN_3
14015 #undef RS6000_BUILTIN_A
14016 #undef RS6000_BUILTIN_D
14017 #undef RS6000_BUILTIN_E
14018 #undef RS6000_BUILTIN_H
14019 #undef RS6000_BUILTIN_P
14020 #undef RS6000_BUILTIN_Q
14021 #undef RS6000_BUILTIN_S
14022 #undef RS6000_BUILTIN_X
14024 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14025 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14026 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14027 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14028 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14029 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14030 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14031 { MASK, ICODE, NAME, ENUM },
14033 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14034 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14035 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14036 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14037 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14039 static const struct builtin_description bdesc_spe_evsel[] =
14041 #include "rs6000-builtin.def"
14044 /* PAIRED predicates. */
14045 #undef RS6000_BUILTIN_0
14046 #undef RS6000_BUILTIN_1
14047 #undef RS6000_BUILTIN_2
14048 #undef RS6000_BUILTIN_3
14049 #undef RS6000_BUILTIN_A
14050 #undef RS6000_BUILTIN_D
14051 #undef RS6000_BUILTIN_E
14052 #undef RS6000_BUILTIN_H
14053 #undef RS6000_BUILTIN_P
14054 #undef RS6000_BUILTIN_Q
14055 #undef RS6000_BUILTIN_S
14056 #undef RS6000_BUILTIN_X
14058 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14059 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14060 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14061 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14062 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14063 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14064 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14065 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14066 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14067 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14068 { MASK, ICODE, NAME, ENUM },
14070 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14071 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14073 static const struct builtin_description bdesc_paired_preds[] =
14075 #include "rs6000-builtin.def"
14078 /* ABS* operations. */
14080 #undef RS6000_BUILTIN_0
14081 #undef RS6000_BUILTIN_1
14082 #undef RS6000_BUILTIN_2
14083 #undef RS6000_BUILTIN_3
14084 #undef RS6000_BUILTIN_A
14085 #undef RS6000_BUILTIN_D
14086 #undef RS6000_BUILTIN_E
14087 #undef RS6000_BUILTIN_H
14088 #undef RS6000_BUILTIN_P
14089 #undef RS6000_BUILTIN_Q
14090 #undef RS6000_BUILTIN_S
14091 #undef RS6000_BUILTIN_X
14093 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14094 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14095 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14096 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14097 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14098 { MASK, ICODE, NAME, ENUM },
14100 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14101 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14102 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14103 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14104 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14105 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14106 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14108 static const struct builtin_description bdesc_abs[] =
14110 #include "rs6000-builtin.def"
14113 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14114 foo (VECa). */
14116 #undef RS6000_BUILTIN_0
14117 #undef RS6000_BUILTIN_1
14118 #undef RS6000_BUILTIN_2
14119 #undef RS6000_BUILTIN_3
14120 #undef RS6000_BUILTIN_A
14121 #undef RS6000_BUILTIN_D
14122 #undef RS6000_BUILTIN_E
14123 #undef RS6000_BUILTIN_H
14124 #undef RS6000_BUILTIN_P
14125 #undef RS6000_BUILTIN_Q
14126 #undef RS6000_BUILTIN_S
14127 #undef RS6000_BUILTIN_X
14129 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14130 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14131 { MASK, ICODE, NAME, ENUM },
14133 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14134 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14135 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14136 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14137 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14138 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14139 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14140 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14141 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14142 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14144 static const struct builtin_description bdesc_1arg[] =
14146 #include "rs6000-builtin.def"
14149 /* Simple no-argument operations: result = __builtin_darn_32 () */
14151 #undef RS6000_BUILTIN_0
14152 #undef RS6000_BUILTIN_1
14153 #undef RS6000_BUILTIN_2
14154 #undef RS6000_BUILTIN_3
14155 #undef RS6000_BUILTIN_A
14156 #undef RS6000_BUILTIN_D
14157 #undef RS6000_BUILTIN_E
14158 #undef RS6000_BUILTIN_H
14159 #undef RS6000_BUILTIN_P
14160 #undef RS6000_BUILTIN_Q
14161 #undef RS6000_BUILTIN_S
14162 #undef RS6000_BUILTIN_X
14164 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14165 { MASK, ICODE, NAME, ENUM },
14167 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14168 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14169 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14170 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14171 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14172 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14173 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14174 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14175 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14176 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14177 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14179 static const struct builtin_description bdesc_0arg[] =
14181 #include "rs6000-builtin.def"
14184 /* HTM builtins. */
14185 #undef RS6000_BUILTIN_0
14186 #undef RS6000_BUILTIN_1
14187 #undef RS6000_BUILTIN_2
14188 #undef RS6000_BUILTIN_3
14189 #undef RS6000_BUILTIN_A
14190 #undef RS6000_BUILTIN_D
14191 #undef RS6000_BUILTIN_E
14192 #undef RS6000_BUILTIN_H
14193 #undef RS6000_BUILTIN_P
14194 #undef RS6000_BUILTIN_Q
14195 #undef RS6000_BUILTIN_S
14196 #undef RS6000_BUILTIN_X
14198 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14199 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14200 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14201 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14202 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14203 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14204 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14205 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14206 { MASK, ICODE, NAME, ENUM },
14208 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14209 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14210 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14211 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14213 static const struct builtin_description bdesc_htm[] =
14215 #include "rs6000-builtin.def"
14218 #undef RS6000_BUILTIN_0
14219 #undef RS6000_BUILTIN_1
14220 #undef RS6000_BUILTIN_2
14221 #undef RS6000_BUILTIN_3
14222 #undef RS6000_BUILTIN_A
14223 #undef RS6000_BUILTIN_D
14224 #undef RS6000_BUILTIN_E
14225 #undef RS6000_BUILTIN_H
14226 #undef RS6000_BUILTIN_P
14227 #undef RS6000_BUILTIN_Q
14228 #undef RS6000_BUILTIN_S
14230 /* Return true if a builtin function is overloaded. */
14231 bool
14232 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14234 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14237 const char *
14238 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14240 return rs6000_builtin_info[(int)fncode].name;
14243 /* Expand an expression EXP that calls a builtin without arguments. */
14244 static rtx
14245 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14247 rtx pat;
14248 machine_mode tmode = insn_data[icode].operand[0].mode;
14250 if (icode == CODE_FOR_nothing)
14251 /* Builtin not supported on this processor. */
14252 return 0;
14254 if (target == 0
14255 || GET_MODE (target) != tmode
14256 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14257 target = gen_reg_rtx (tmode);
14259 pat = GEN_FCN (icode) (target);
14260 if (! pat)
14261 return 0;
14262 emit_insn (pat);
14264 return target;
14268 static rtx
14269 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14271 rtx pat;
14272 tree arg0 = CALL_EXPR_ARG (exp, 0);
14273 tree arg1 = CALL_EXPR_ARG (exp, 1);
14274 rtx op0 = expand_normal (arg0);
14275 rtx op1 = expand_normal (arg1);
14276 machine_mode mode0 = insn_data[icode].operand[0].mode;
14277 machine_mode mode1 = insn_data[icode].operand[1].mode;
14279 if (icode == CODE_FOR_nothing)
14280 /* Builtin not supported on this processor. */
14281 return 0;
14283 /* If we got invalid arguments bail out before generating bad rtl. */
14284 if (arg0 == error_mark_node || arg1 == error_mark_node)
14285 return const0_rtx;
14287 if (GET_CODE (op0) != CONST_INT
14288 || INTVAL (op0) > 255
14289 || INTVAL (op0) < 0)
14291 error ("argument 1 must be an 8-bit field value");
14292 return const0_rtx;
14295 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14296 op0 = copy_to_mode_reg (mode0, op0);
14298 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14299 op1 = copy_to_mode_reg (mode1, op1);
14301 pat = GEN_FCN (icode) (op0, op1);
14302 if (! pat)
14303 return const0_rtx;
14304 emit_insn (pat);
14306 return NULL_RTX;
14309 static rtx
14310 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14312 rtx pat;
14313 tree arg0 = CALL_EXPR_ARG (exp, 0);
14314 rtx op0 = expand_normal (arg0);
14315 machine_mode tmode = insn_data[icode].operand[0].mode;
14316 machine_mode mode0 = insn_data[icode].operand[1].mode;
14318 if (icode == CODE_FOR_nothing)
14319 /* Builtin not supported on this processor. */
14320 return 0;
14322 /* If we got invalid arguments bail out before generating bad rtl. */
14323 if (arg0 == error_mark_node)
14324 return const0_rtx;
14326 if (icode == CODE_FOR_altivec_vspltisb
14327 || icode == CODE_FOR_altivec_vspltish
14328 || icode == CODE_FOR_altivec_vspltisw
14329 || icode == CODE_FOR_spe_evsplatfi
14330 || icode == CODE_FOR_spe_evsplati)
14332 /* Only allow 5-bit *signed* literals. */
14333 if (GET_CODE (op0) != CONST_INT
14334 || INTVAL (op0) > 15
14335 || INTVAL (op0) < -16)
14337 error ("argument 1 must be a 5-bit signed literal");
14338 return const0_rtx;
14342 if (target == 0
14343 || GET_MODE (target) != tmode
14344 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14345 target = gen_reg_rtx (tmode);
14347 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14348 op0 = copy_to_mode_reg (mode0, op0);
14350 pat = GEN_FCN (icode) (target, op0);
14351 if (! pat)
14352 return 0;
14353 emit_insn (pat);
14355 return target;
14358 static rtx
14359 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14361 rtx pat, scratch1, scratch2;
14362 tree arg0 = CALL_EXPR_ARG (exp, 0);
14363 rtx op0 = expand_normal (arg0);
14364 machine_mode tmode = insn_data[icode].operand[0].mode;
14365 machine_mode mode0 = insn_data[icode].operand[1].mode;
14367 /* If we have invalid arguments, bail out before generating bad rtl. */
14368 if (arg0 == error_mark_node)
14369 return const0_rtx;
14371 if (target == 0
14372 || GET_MODE (target) != tmode
14373 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14374 target = gen_reg_rtx (tmode);
14376 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14377 op0 = copy_to_mode_reg (mode0, op0);
14379 scratch1 = gen_reg_rtx (mode0);
14380 scratch2 = gen_reg_rtx (mode0);
14382 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14383 if (! pat)
14384 return 0;
14385 emit_insn (pat);
14387 return target;
14390 static rtx
14391 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14393 rtx pat;
14394 tree arg0 = CALL_EXPR_ARG (exp, 0);
14395 tree arg1 = CALL_EXPR_ARG (exp, 1);
14396 rtx op0 = expand_normal (arg0);
14397 rtx op1 = expand_normal (arg1);
14398 machine_mode tmode = insn_data[icode].operand[0].mode;
14399 machine_mode mode0 = insn_data[icode].operand[1].mode;
14400 machine_mode mode1 = insn_data[icode].operand[2].mode;
14402 if (icode == CODE_FOR_nothing)
14403 /* Builtin not supported on this processor. */
14404 return 0;
14406 /* If we got invalid arguments bail out before generating bad rtl. */
14407 if (arg0 == error_mark_node || arg1 == error_mark_node)
14408 return const0_rtx;
14410 if (icode == CODE_FOR_altivec_vcfux
14411 || icode == CODE_FOR_altivec_vcfsx
14412 || icode == CODE_FOR_altivec_vctsxs
14413 || icode == CODE_FOR_altivec_vctuxs
14414 || icode == CODE_FOR_altivec_vspltb
14415 || icode == CODE_FOR_altivec_vsplth
14416 || icode == CODE_FOR_altivec_vspltw
14417 || icode == CODE_FOR_spe_evaddiw
14418 || icode == CODE_FOR_spe_evldd
14419 || icode == CODE_FOR_spe_evldh
14420 || icode == CODE_FOR_spe_evldw
14421 || icode == CODE_FOR_spe_evlhhesplat
14422 || icode == CODE_FOR_spe_evlhhossplat
14423 || icode == CODE_FOR_spe_evlhhousplat
14424 || icode == CODE_FOR_spe_evlwhe
14425 || icode == CODE_FOR_spe_evlwhos
14426 || icode == CODE_FOR_spe_evlwhou
14427 || icode == CODE_FOR_spe_evlwhsplat
14428 || icode == CODE_FOR_spe_evlwwsplat
14429 || icode == CODE_FOR_spe_evrlwi
14430 || icode == CODE_FOR_spe_evslwi
14431 || icode == CODE_FOR_spe_evsrwis
14432 || icode == CODE_FOR_spe_evsubifw
14433 || icode == CODE_FOR_spe_evsrwiu)
14435 /* Only allow 5-bit unsigned literals. */
14436 STRIP_NOPS (arg1);
14437 if (TREE_CODE (arg1) != INTEGER_CST
14438 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14440 error ("argument 2 must be a 5-bit unsigned literal");
14441 return const0_rtx;
14444 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14445 || icode == CODE_FOR_dfptstsfi_lt_dd
14446 || icode == CODE_FOR_dfptstsfi_gt_dd
14447 || icode == CODE_FOR_dfptstsfi_unordered_dd
14448 || icode == CODE_FOR_dfptstsfi_eq_td
14449 || icode == CODE_FOR_dfptstsfi_lt_td
14450 || icode == CODE_FOR_dfptstsfi_gt_td
14451 || icode == CODE_FOR_dfptstsfi_unordered_td)
14453 /* Only allow 6-bit unsigned literals. */
14454 STRIP_NOPS (arg0);
14455 if (TREE_CODE (arg0) != INTEGER_CST
14456 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14458 error ("argument 1 must be a 6-bit unsigned literal");
14459 return CONST0_RTX (tmode);
14462 else if (icode == CODE_FOR_xststdcdp
14463 || icode == CODE_FOR_xststdcsp
14464 || icode == CODE_FOR_xvtstdcdp
14465 || icode == CODE_FOR_xvtstdcsp)
14467 /* Only allow 7-bit unsigned literals. */
14468 STRIP_NOPS (arg1);
14469 if (TREE_CODE (arg1) != INTEGER_CST
14470 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14472 error ("argument 2 must be a 7-bit unsigned literal");
14473 return CONST0_RTX (tmode);
14477 if (target == 0
14478 || GET_MODE (target) != tmode
14479 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14480 target = gen_reg_rtx (tmode);
14482 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14483 op0 = copy_to_mode_reg (mode0, op0);
14484 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14485 op1 = copy_to_mode_reg (mode1, op1);
14487 pat = GEN_FCN (icode) (target, op0, op1);
14488 if (! pat)
14489 return 0;
14490 emit_insn (pat);
14492 return target;
14495 static rtx
14496 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14498 rtx pat, scratch;
14499 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14500 tree arg0 = CALL_EXPR_ARG (exp, 1);
14501 tree arg1 = CALL_EXPR_ARG (exp, 2);
14502 rtx op0 = expand_normal (arg0);
14503 rtx op1 = expand_normal (arg1);
14504 machine_mode tmode = SImode;
14505 machine_mode mode0 = insn_data[icode].operand[1].mode;
14506 machine_mode mode1 = insn_data[icode].operand[2].mode;
14507 int cr6_form_int;
14509 if (TREE_CODE (cr6_form) != INTEGER_CST)
14511 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14512 return const0_rtx;
14514 else
14515 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14517 gcc_assert (mode0 == mode1);
14519 /* If we have invalid arguments, bail out before generating bad rtl. */
14520 if (arg0 == error_mark_node || arg1 == error_mark_node)
14521 return const0_rtx;
14523 if (target == 0
14524 || GET_MODE (target) != tmode
14525 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14526 target = gen_reg_rtx (tmode);
14528 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14529 op0 = copy_to_mode_reg (mode0, op0);
14530 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14531 op1 = copy_to_mode_reg (mode1, op1);
14533 /* Note that for many of the relevant operations (e.g. cmpne or
14534 cmpeq) with float or double operands, it makes more sense for the
14535 mode of the allocated scratch register to select a vector of
14536 integer. But the choice to copy the mode of operand 0 was made
14537 long ago and there are no plans to change it. */
14538 scratch = gen_reg_rtx (mode0);
14540 pat = GEN_FCN (icode) (scratch, op0, op1);
14541 if (! pat)
14542 return 0;
14543 emit_insn (pat);
14545 /* The vec_any* and vec_all* predicates use the same opcodes for two
14546 different operations, but the bits in CR6 will be different
14547 depending on what information we want. So we have to play tricks
14548 with CR6 to get the right bits out.
14550 If you think this is disgusting, look at the specs for the
14551 AltiVec predicates. */
14553 switch (cr6_form_int)
14555 case 0:
14556 emit_insn (gen_cr6_test_for_zero (target));
14557 break;
14558 case 1:
14559 emit_insn (gen_cr6_test_for_zero_reverse (target));
14560 break;
14561 case 2:
14562 emit_insn (gen_cr6_test_for_lt (target));
14563 break;
14564 case 3:
14565 emit_insn (gen_cr6_test_for_lt_reverse (target));
14566 break;
14567 default:
14568 error ("argument 1 of __builtin_altivec_predicate is out of range");
14569 break;
14572 return target;
14575 static rtx
14576 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14578 rtx pat, addr;
14579 tree arg0 = CALL_EXPR_ARG (exp, 0);
14580 tree arg1 = CALL_EXPR_ARG (exp, 1);
14581 machine_mode tmode = insn_data[icode].operand[0].mode;
14582 machine_mode mode0 = Pmode;
14583 machine_mode mode1 = Pmode;
14584 rtx op0 = expand_normal (arg0);
14585 rtx op1 = expand_normal (arg1);
14587 if (icode == CODE_FOR_nothing)
14588 /* Builtin not supported on this processor. */
14589 return 0;
14591 /* If we got invalid arguments bail out before generating bad rtl. */
14592 if (arg0 == error_mark_node || arg1 == error_mark_node)
14593 return const0_rtx;
14595 if (target == 0
14596 || GET_MODE (target) != tmode
14597 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14598 target = gen_reg_rtx (tmode);
14600 op1 = copy_to_mode_reg (mode1, op1);
14602 if (op0 == const0_rtx)
14604 addr = gen_rtx_MEM (tmode, op1);
14606 else
14608 op0 = copy_to_mode_reg (mode0, op0);
14609 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14612 pat = GEN_FCN (icode) (target, addr);
14614 if (! pat)
14615 return 0;
14616 emit_insn (pat);
14618 return target;
14621 /* Return a constant vector for use as a little-endian permute control vector
14622 to reverse the order of elements of the given vector mode. */
14623 static rtx
14624 swap_selector_for_mode (machine_mode mode)
14626 /* These are little endian vectors, so their elements are reversed
14627 from what you would normally expect for a permute control vector. */
14628 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14629 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14630 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14631 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14632 unsigned int *swaparray, i;
14633 rtx perm[16];
14635 switch (mode)
14637 case V2DFmode:
14638 case V2DImode:
14639 swaparray = swap2;
14640 break;
14641 case V4SFmode:
14642 case V4SImode:
14643 swaparray = swap4;
14644 break;
14645 case V8HImode:
14646 swaparray = swap8;
14647 break;
14648 case V16QImode:
14649 swaparray = swap16;
14650 break;
14651 default:
14652 gcc_unreachable ();
14655 for (i = 0; i < 16; ++i)
14656 perm[i] = GEN_INT (swaparray[i]);
14658 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14661 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14662 with -maltivec=be specified. Issue the load followed by an element-
14663 reversing permute. */
14664 void
14665 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14667 rtx tmp = gen_reg_rtx (mode);
14668 rtx load = gen_rtx_SET (tmp, op1);
14669 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14670 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14671 rtx sel = swap_selector_for_mode (mode);
14672 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14674 gcc_assert (REG_P (op0));
14675 emit_insn (par);
14676 emit_insn (gen_rtx_SET (op0, vperm));
14679 /* Generate code for a "stvxl" built-in for a little endian target with
14680 -maltivec=be specified. Issue the store preceded by an element-reversing
14681 permute. */
14682 void
14683 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14685 rtx tmp = gen_reg_rtx (mode);
14686 rtx store = gen_rtx_SET (op0, tmp);
14687 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14688 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14689 rtx sel = swap_selector_for_mode (mode);
14690 rtx vperm;
14692 gcc_assert (REG_P (op1));
14693 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14694 emit_insn (gen_rtx_SET (tmp, vperm));
14695 emit_insn (par);
14698 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14699 specified. Issue the store preceded by an element-reversing permute. */
14700 void
14701 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14703 machine_mode inner_mode = GET_MODE_INNER (mode);
14704 rtx tmp = gen_reg_rtx (mode);
14705 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14706 rtx sel = swap_selector_for_mode (mode);
14707 rtx vperm;
14709 gcc_assert (REG_P (op1));
14710 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14711 emit_insn (gen_rtx_SET (tmp, vperm));
14712 emit_insn (gen_rtx_SET (op0, stvx));
14715 static rtx
14716 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14718 rtx pat, addr;
14719 tree arg0 = CALL_EXPR_ARG (exp, 0);
14720 tree arg1 = CALL_EXPR_ARG (exp, 1);
14721 machine_mode tmode = insn_data[icode].operand[0].mode;
14722 machine_mode mode0 = Pmode;
14723 machine_mode mode1 = Pmode;
14724 rtx op0 = expand_normal (arg0);
14725 rtx op1 = expand_normal (arg1);
14727 if (icode == CODE_FOR_nothing)
14728 /* Builtin not supported on this processor. */
14729 return 0;
14731 /* If we got invalid arguments bail out before generating bad rtl. */
14732 if (arg0 == error_mark_node || arg1 == error_mark_node)
14733 return const0_rtx;
14735 if (target == 0
14736 || GET_MODE (target) != tmode
14737 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14738 target = gen_reg_rtx (tmode);
14740 op1 = copy_to_mode_reg (mode1, op1);
14742 /* For LVX, express the RTL accurately by ANDing the address with -16.
14743 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14744 so the raw address is fine. */
14745 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14746 || icode == CODE_FOR_altivec_lvx_v2di_2op
14747 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14748 || icode == CODE_FOR_altivec_lvx_v4si_2op
14749 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14750 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14752 rtx rawaddr;
14753 if (op0 == const0_rtx)
14754 rawaddr = op1;
14755 else
14757 op0 = copy_to_mode_reg (mode0, op0);
14758 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14760 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14761 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14763 /* For -maltivec=be, emit the load and follow it up with a
14764 permute to swap the elements. */
14765 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14767 rtx temp = gen_reg_rtx (tmode);
14768 emit_insn (gen_rtx_SET (temp, addr));
14770 rtx sel = swap_selector_for_mode (tmode);
14771 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14772 UNSPEC_VPERM);
14773 emit_insn (gen_rtx_SET (target, vperm));
14775 else
14776 emit_insn (gen_rtx_SET (target, addr));
14778 else
14780 if (op0 == const0_rtx)
14781 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14782 else
14784 op0 = copy_to_mode_reg (mode0, op0);
14785 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14786 gen_rtx_PLUS (Pmode, op1, op0));
14789 pat = GEN_FCN (icode) (target, addr);
14790 if (! pat)
14791 return 0;
14792 emit_insn (pat);
14795 return target;
14798 static rtx
14799 spe_expand_stv_builtin (enum insn_code icode, tree exp)
14801 tree arg0 = CALL_EXPR_ARG (exp, 0);
14802 tree arg1 = CALL_EXPR_ARG (exp, 1);
14803 tree arg2 = CALL_EXPR_ARG (exp, 2);
14804 rtx op0 = expand_normal (arg0);
14805 rtx op1 = expand_normal (arg1);
14806 rtx op2 = expand_normal (arg2);
14807 rtx pat;
14808 machine_mode mode0 = insn_data[icode].operand[0].mode;
14809 machine_mode mode1 = insn_data[icode].operand[1].mode;
14810 machine_mode mode2 = insn_data[icode].operand[2].mode;
14812 /* Invalid arguments. Bail before doing anything stoopid! */
14813 if (arg0 == error_mark_node
14814 || arg1 == error_mark_node
14815 || arg2 == error_mark_node)
14816 return const0_rtx;
14818 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
14819 op0 = copy_to_mode_reg (mode2, op0);
14820 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
14821 op1 = copy_to_mode_reg (mode0, op1);
14822 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14823 op2 = copy_to_mode_reg (mode1, op2);
14825 pat = GEN_FCN (icode) (op1, op2, op0);
14826 if (pat)
14827 emit_insn (pat);
14828 return NULL_RTX;
14831 static rtx
14832 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14834 tree arg0 = CALL_EXPR_ARG (exp, 0);
14835 tree arg1 = CALL_EXPR_ARG (exp, 1);
14836 tree arg2 = CALL_EXPR_ARG (exp, 2);
14837 rtx op0 = expand_normal (arg0);
14838 rtx op1 = expand_normal (arg1);
14839 rtx op2 = expand_normal (arg2);
14840 rtx pat, addr;
14841 machine_mode tmode = insn_data[icode].operand[0].mode;
14842 machine_mode mode1 = Pmode;
14843 machine_mode mode2 = Pmode;
14845 /* Invalid arguments. Bail before doing anything stoopid! */
14846 if (arg0 == error_mark_node
14847 || arg1 == error_mark_node
14848 || arg2 == error_mark_node)
14849 return const0_rtx;
14851 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14852 op0 = copy_to_mode_reg (tmode, op0);
14854 op2 = copy_to_mode_reg (mode2, op2);
14856 if (op1 == const0_rtx)
14858 addr = gen_rtx_MEM (tmode, op2);
14860 else
14862 op1 = copy_to_mode_reg (mode1, op1);
14863 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14866 pat = GEN_FCN (icode) (addr, op0);
14867 if (pat)
14868 emit_insn (pat);
14869 return NULL_RTX;
14872 static rtx
14873 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14875 rtx pat;
14876 tree arg0 = CALL_EXPR_ARG (exp, 0);
14877 tree arg1 = CALL_EXPR_ARG (exp, 1);
14878 tree arg2 = CALL_EXPR_ARG (exp, 2);
14879 rtx op0 = expand_normal (arg0);
14880 rtx op1 = expand_normal (arg1);
14881 rtx op2 = expand_normal (arg2);
14882 machine_mode mode0 = insn_data[icode].operand[0].mode;
14883 machine_mode mode1 = insn_data[icode].operand[1].mode;
14884 machine_mode mode2 = insn_data[icode].operand[2].mode;
14886 if (icode == CODE_FOR_nothing)
14887 /* Builtin not supported on this processor. */
14888 return NULL_RTX;
14890 /* If we got invalid arguments bail out before generating bad rtl. */
14891 if (arg0 == error_mark_node
14892 || arg1 == error_mark_node
14893 || arg2 == error_mark_node)
14894 return NULL_RTX;
14896 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14897 op0 = copy_to_mode_reg (mode0, op0);
14898 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14899 op1 = copy_to_mode_reg (mode1, op1);
14900 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14901 op2 = copy_to_mode_reg (mode2, op2);
14903 pat = GEN_FCN (icode) (op0, op1, op2);
14904 if (pat)
14905 emit_insn (pat);
14907 return NULL_RTX;
14910 static rtx
14911 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14913 tree arg0 = CALL_EXPR_ARG (exp, 0);
14914 tree arg1 = CALL_EXPR_ARG (exp, 1);
14915 tree arg2 = CALL_EXPR_ARG (exp, 2);
14916 rtx op0 = expand_normal (arg0);
14917 rtx op1 = expand_normal (arg1);
14918 rtx op2 = expand_normal (arg2);
14919 rtx pat, addr, rawaddr;
14920 machine_mode tmode = insn_data[icode].operand[0].mode;
14921 machine_mode smode = insn_data[icode].operand[1].mode;
14922 machine_mode mode1 = Pmode;
14923 machine_mode mode2 = Pmode;
14925 /* Invalid arguments. Bail before doing anything stoopid! */
14926 if (arg0 == error_mark_node
14927 || arg1 == error_mark_node
14928 || arg2 == error_mark_node)
14929 return const0_rtx;
14931 op2 = copy_to_mode_reg (mode2, op2);
14933 /* For STVX, express the RTL accurately by ANDing the address with -16.
14934 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14935 so the raw address is fine. */
14936 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14937 || icode == CODE_FOR_altivec_stvx_v2di_2op
14938 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14939 || icode == CODE_FOR_altivec_stvx_v4si_2op
14940 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14941 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14943 if (op1 == const0_rtx)
14944 rawaddr = op2;
14945 else
14947 op1 = copy_to_mode_reg (mode1, op1);
14948 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14951 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14952 addr = gen_rtx_MEM (tmode, addr);
14954 op0 = copy_to_mode_reg (tmode, op0);
14956 /* For -maltivec=be, emit a permute to swap the elements, followed
14957 by the store. */
14958 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14960 rtx temp = gen_reg_rtx (tmode);
14961 rtx sel = swap_selector_for_mode (tmode);
14962 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14963 UNSPEC_VPERM);
14964 emit_insn (gen_rtx_SET (temp, vperm));
14965 emit_insn (gen_rtx_SET (addr, temp));
14967 else
14968 emit_insn (gen_rtx_SET (addr, op0));
14970 else
14972 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14973 op0 = copy_to_mode_reg (smode, op0);
14975 if (op1 == const0_rtx)
14976 addr = gen_rtx_MEM (tmode, op2);
14977 else
14979 op1 = copy_to_mode_reg (mode1, op1);
14980 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14983 pat = GEN_FCN (icode) (addr, op0);
14984 if (pat)
14985 emit_insn (pat);
14988 return NULL_RTX;
14991 /* Return the appropriate SPR number associated with the given builtin. */
14992 static inline HOST_WIDE_INT
14993 htm_spr_num (enum rs6000_builtins code)
14995 if (code == HTM_BUILTIN_GET_TFHAR
14996 || code == HTM_BUILTIN_SET_TFHAR)
14997 return TFHAR_SPR;
14998 else if (code == HTM_BUILTIN_GET_TFIAR
14999 || code == HTM_BUILTIN_SET_TFIAR)
15000 return TFIAR_SPR;
15001 else if (code == HTM_BUILTIN_GET_TEXASR
15002 || code == HTM_BUILTIN_SET_TEXASR)
15003 return TEXASR_SPR;
15004 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15005 || code == HTM_BUILTIN_SET_TEXASRU);
15006 return TEXASRU_SPR;
15009 /* Return the appropriate SPR regno associated with the given builtin. */
15010 static inline HOST_WIDE_INT
15011 htm_spr_regno (enum rs6000_builtins code)
15013 if (code == HTM_BUILTIN_GET_TFHAR
15014 || code == HTM_BUILTIN_SET_TFHAR)
15015 return TFHAR_REGNO;
15016 else if (code == HTM_BUILTIN_GET_TFIAR
15017 || code == HTM_BUILTIN_SET_TFIAR)
15018 return TFIAR_REGNO;
15019 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15020 || code == HTM_BUILTIN_SET_TEXASR
15021 || code == HTM_BUILTIN_GET_TEXASRU
15022 || code == HTM_BUILTIN_SET_TEXASRU);
15023 return TEXASR_REGNO;
15026 /* Return the correct ICODE value depending on whether we are
15027 setting or reading the HTM SPRs. */
15028 static inline enum insn_code
15029 rs6000_htm_spr_icode (bool nonvoid)
15031 if (nonvoid)
15032 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15033 else
15034 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15037 /* Expand the HTM builtin in EXP and store the result in TARGET.
15038 Store true in *EXPANDEDP if we found a builtin to expand. */
15039 static rtx
15040 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15042 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15043 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15044 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15045 const struct builtin_description *d;
15046 size_t i;
15048 *expandedp = true;
15050 if (!TARGET_POWERPC64
15051 && (fcode == HTM_BUILTIN_TABORTDC
15052 || fcode == HTM_BUILTIN_TABORTDCI))
15054 size_t uns_fcode = (size_t)fcode;
15055 const char *name = rs6000_builtin_info[uns_fcode].name;
15056 error ("builtin %s is only valid in 64-bit mode", name);
15057 return const0_rtx;
15060 /* Expand the HTM builtins. */
15061 d = bdesc_htm;
15062 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15063 if (d->code == fcode)
15065 rtx op[MAX_HTM_OPERANDS], pat;
15066 int nopnds = 0;
15067 tree arg;
15068 call_expr_arg_iterator iter;
15069 unsigned attr = rs6000_builtin_info[fcode].attr;
15070 enum insn_code icode = d->icode;
15071 const struct insn_operand_data *insn_op;
15072 bool uses_spr = (attr & RS6000_BTC_SPR);
15073 rtx cr = NULL_RTX;
15075 if (uses_spr)
15076 icode = rs6000_htm_spr_icode (nonvoid);
15077 insn_op = &insn_data[icode].operand[0];
15079 if (nonvoid)
15081 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
15082 if (!target
15083 || GET_MODE (target) != tmode
15084 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
15085 target = gen_reg_rtx (tmode);
15086 if (uses_spr)
15087 op[nopnds++] = target;
15090 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15092 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15093 return const0_rtx;
15095 insn_op = &insn_data[icode].operand[nopnds];
15097 op[nopnds] = expand_normal (arg);
15099 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15101 if (!strcmp (insn_op->constraint, "n"))
15103 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15104 if (!CONST_INT_P (op[nopnds]))
15105 error ("argument %d must be an unsigned literal", arg_num);
15106 else
15107 error ("argument %d is an unsigned literal that is "
15108 "out of range", arg_num);
15109 return const0_rtx;
15111 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15114 nopnds++;
15117 /* Handle the builtins for extended mnemonics. These accept
15118 no arguments, but map to builtins that take arguments. */
15119 switch (fcode)
15121 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
15122 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
15123 op[nopnds++] = GEN_INT (1);
15124 if (flag_checking)
15125 attr |= RS6000_BTC_UNARY;
15126 break;
15127 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
15128 op[nopnds++] = GEN_INT (0);
15129 if (flag_checking)
15130 attr |= RS6000_BTC_UNARY;
15131 break;
15132 default:
15133 break;
15136 /* If this builtin accesses SPRs, then pass in the appropriate
15137 SPR number and SPR regno as the last two operands. */
15138 if (uses_spr)
15140 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15141 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15142 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15144 /* If this builtin accesses a CR, then pass in a scratch
15145 CR as the last operand. */
15146 else if (attr & RS6000_BTC_CR)
15147 { cr = gen_reg_rtx (CCmode);
15148 op[nopnds++] = cr;
15151 if (flag_checking)
15153 int expected_nopnds = 0;
15154 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15155 expected_nopnds = 1;
15156 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15157 expected_nopnds = 2;
15158 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15159 expected_nopnds = 3;
15160 if (!(attr & RS6000_BTC_VOID))
15161 expected_nopnds += 1;
15162 if (uses_spr)
15163 expected_nopnds += 2;
15165 gcc_assert (nopnds == expected_nopnds
15166 && nopnds <= MAX_HTM_OPERANDS);
15169 switch (nopnds)
15171 case 1:
15172 pat = GEN_FCN (icode) (op[0]);
15173 break;
15174 case 2:
15175 pat = GEN_FCN (icode) (op[0], op[1]);
15176 break;
15177 case 3:
15178 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15179 break;
15180 case 4:
15181 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15182 break;
15183 default:
15184 gcc_unreachable ();
15186 if (!pat)
15187 return NULL_RTX;
15188 emit_insn (pat);
15190 if (attr & RS6000_BTC_CR)
15192 if (fcode == HTM_BUILTIN_TBEGIN)
15194 /* Emit code to set TARGET to true or false depending on
15195 whether the tbegin. instruction successfully or failed
15196 to start a transaction. We do this by placing the 1's
15197 complement of CR's EQ bit into TARGET. */
15198 rtx scratch = gen_reg_rtx (SImode);
15199 emit_insn (gen_rtx_SET (scratch,
15200 gen_rtx_EQ (SImode, cr,
15201 const0_rtx)));
15202 emit_insn (gen_rtx_SET (target,
15203 gen_rtx_XOR (SImode, scratch,
15204 GEN_INT (1))));
15206 else
15208 /* Emit code to copy the 4-bit condition register field
15209 CR into the least significant end of register TARGET. */
15210 rtx scratch1 = gen_reg_rtx (SImode);
15211 rtx scratch2 = gen_reg_rtx (SImode);
15212 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15213 emit_insn (gen_movcc (subreg, cr));
15214 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15215 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15219 if (nonvoid)
15220 return target;
15221 return const0_rtx;
15224 *expandedp = false;
15225 return NULL_RTX;
15228 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15230 static rtx
15231 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15232 rtx target)
15234 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15235 if (fcode == RS6000_BUILTIN_CPU_INIT)
15236 return const0_rtx;
15238 if (target == 0 || GET_MODE (target) != SImode)
15239 target = gen_reg_rtx (SImode);
15241 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15242 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15243 if (TREE_CODE (arg) != STRING_CST)
15245 error ("builtin %s only accepts a string argument",
15246 rs6000_builtin_info[(size_t) fcode].name);
15247 return const0_rtx;
15250 if (fcode == RS6000_BUILTIN_CPU_IS)
15252 const char *cpu = TREE_STRING_POINTER (arg);
15253 rtx cpuid = NULL_RTX;
15254 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15255 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15257 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15258 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15259 break;
15261 if (cpuid == NULL_RTX)
15263 /* Invalid CPU argument. */
15264 error ("cpu %s is an invalid argument to builtin %s",
15265 cpu, rs6000_builtin_info[(size_t) fcode].name);
15266 return const0_rtx;
15269 rtx platform = gen_reg_rtx (SImode);
15270 rtx tcbmem = gen_const_mem (SImode,
15271 gen_rtx_PLUS (Pmode,
15272 gen_rtx_REG (Pmode, TLS_REGNUM),
15273 GEN_INT (TCB_PLATFORM_OFFSET)));
15274 emit_move_insn (platform, tcbmem);
15275 emit_insn (gen_eqsi3 (target, platform, cpuid));
15277 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15279 const char *hwcap = TREE_STRING_POINTER (arg);
15280 rtx mask = NULL_RTX;
15281 int hwcap_offset;
15282 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15283 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15285 mask = GEN_INT (cpu_supports_info[i].mask);
15286 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15287 break;
15289 if (mask == NULL_RTX)
15291 /* Invalid HWCAP argument. */
15292 error ("hwcap %s is an invalid argument to builtin %s",
15293 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15294 return const0_rtx;
15297 rtx tcb_hwcap = gen_reg_rtx (SImode);
15298 rtx tcbmem = gen_const_mem (SImode,
15299 gen_rtx_PLUS (Pmode,
15300 gen_rtx_REG (Pmode, TLS_REGNUM),
15301 GEN_INT (hwcap_offset)));
15302 emit_move_insn (tcb_hwcap, tcbmem);
15303 rtx scratch1 = gen_reg_rtx (SImode);
15304 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15305 rtx scratch2 = gen_reg_rtx (SImode);
15306 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15307 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15310 /* Record that we have expanded a CPU builtin, so that we can later
15311 emit a reference to the special symbol exported by LIBC to ensure we
15312 do not link against an old LIBC that doesn't support this feature. */
15313 cpu_builtin_p = true;
15315 #else
15316 /* For old LIBCs, always return FALSE. */
15317 emit_move_insn (target, GEN_INT (0));
15318 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15320 return target;
15323 static rtx
15324 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15326 rtx pat;
15327 tree arg0 = CALL_EXPR_ARG (exp, 0);
15328 tree arg1 = CALL_EXPR_ARG (exp, 1);
15329 tree arg2 = CALL_EXPR_ARG (exp, 2);
15330 rtx op0 = expand_normal (arg0);
15331 rtx op1 = expand_normal (arg1);
15332 rtx op2 = expand_normal (arg2);
15333 machine_mode tmode = insn_data[icode].operand[0].mode;
15334 machine_mode mode0 = insn_data[icode].operand[1].mode;
15335 machine_mode mode1 = insn_data[icode].operand[2].mode;
15336 machine_mode mode2 = insn_data[icode].operand[3].mode;
15338 if (icode == CODE_FOR_nothing)
15339 /* Builtin not supported on this processor. */
15340 return 0;
15342 /* If we got invalid arguments bail out before generating bad rtl. */
15343 if (arg0 == error_mark_node
15344 || arg1 == error_mark_node
15345 || arg2 == error_mark_node)
15346 return const0_rtx;
15348 /* Check and prepare argument depending on the instruction code.
15350 Note that a switch statement instead of the sequence of tests
15351 would be incorrect as many of the CODE_FOR values could be
15352 CODE_FOR_nothing and that would yield multiple alternatives
15353 with identical values. We'd never reach here at runtime in
15354 this case. */
15355 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15356 || icode == CODE_FOR_altivec_vsldoi_v2df
15357 || icode == CODE_FOR_altivec_vsldoi_v4si
15358 || icode == CODE_FOR_altivec_vsldoi_v8hi
15359 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15361 /* Only allow 4-bit unsigned literals. */
15362 STRIP_NOPS (arg2);
15363 if (TREE_CODE (arg2) != INTEGER_CST
15364 || TREE_INT_CST_LOW (arg2) & ~0xf)
15366 error ("argument 3 must be a 4-bit unsigned literal");
15367 return const0_rtx;
15370 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15371 || icode == CODE_FOR_vsx_xxpermdi_v2di
15372 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15373 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15374 || icode == CODE_FOR_vsx_xxsldwi_v4si
15375 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15376 || icode == CODE_FOR_vsx_xxsldwi_v2di
15377 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15379 /* Only allow 2-bit unsigned literals. */
15380 STRIP_NOPS (arg2);
15381 if (TREE_CODE (arg2) != INTEGER_CST
15382 || TREE_INT_CST_LOW (arg2) & ~0x3)
15384 error ("argument 3 must be a 2-bit unsigned literal");
15385 return const0_rtx;
15388 else if (icode == CODE_FOR_vsx_set_v2df
15389 || icode == CODE_FOR_vsx_set_v2di
15390 || icode == CODE_FOR_bcdadd
15391 || icode == CODE_FOR_bcdadd_lt
15392 || icode == CODE_FOR_bcdadd_eq
15393 || icode == CODE_FOR_bcdadd_gt
15394 || icode == CODE_FOR_bcdsub
15395 || icode == CODE_FOR_bcdsub_lt
15396 || icode == CODE_FOR_bcdsub_eq
15397 || icode == CODE_FOR_bcdsub_gt)
15399 /* Only allow 1-bit unsigned literals. */
15400 STRIP_NOPS (arg2);
15401 if (TREE_CODE (arg2) != INTEGER_CST
15402 || TREE_INT_CST_LOW (arg2) & ~0x1)
15404 error ("argument 3 must be a 1-bit unsigned literal");
15405 return const0_rtx;
15408 else if (icode == CODE_FOR_dfp_ddedpd_dd
15409 || icode == CODE_FOR_dfp_ddedpd_td)
15411 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15412 STRIP_NOPS (arg0);
15413 if (TREE_CODE (arg0) != INTEGER_CST
15414 || TREE_INT_CST_LOW (arg2) & ~0x3)
15416 error ("argument 1 must be 0 or 2");
15417 return const0_rtx;
15420 else if (icode == CODE_FOR_dfp_denbcd_dd
15421 || icode == CODE_FOR_dfp_denbcd_td)
15423 /* Only allow 1-bit unsigned literals. */
15424 STRIP_NOPS (arg0);
15425 if (TREE_CODE (arg0) != INTEGER_CST
15426 || TREE_INT_CST_LOW (arg0) & ~0x1)
15428 error ("argument 1 must be a 1-bit unsigned literal");
15429 return const0_rtx;
15432 else if (icode == CODE_FOR_dfp_dscli_dd
15433 || icode == CODE_FOR_dfp_dscli_td
15434 || icode == CODE_FOR_dfp_dscri_dd
15435 || icode == CODE_FOR_dfp_dscri_td)
15437 /* Only allow 6-bit unsigned literals. */
15438 STRIP_NOPS (arg1);
15439 if (TREE_CODE (arg1) != INTEGER_CST
15440 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15442 error ("argument 2 must be a 6-bit unsigned literal");
15443 return const0_rtx;
15446 else if (icode == CODE_FOR_crypto_vshasigmaw
15447 || icode == CODE_FOR_crypto_vshasigmad)
15449 /* Check whether the 2nd and 3rd arguments are integer constants and in
15450 range and prepare arguments. */
15451 STRIP_NOPS (arg1);
15452 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15454 error ("argument 2 must be 0 or 1");
15455 return const0_rtx;
15458 STRIP_NOPS (arg2);
15459 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
15461 error ("argument 3 must be in the range 0..15");
15462 return const0_rtx;
15466 if (target == 0
15467 || GET_MODE (target) != tmode
15468 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15469 target = gen_reg_rtx (tmode);
15471 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15472 op0 = copy_to_mode_reg (mode0, op0);
15473 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15474 op1 = copy_to_mode_reg (mode1, op1);
15475 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15476 op2 = copy_to_mode_reg (mode2, op2);
15478 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15479 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15480 else
15481 pat = GEN_FCN (icode) (target, op0, op1, op2);
15482 if (! pat)
15483 return 0;
15484 emit_insn (pat);
15486 return target;
15489 /* Expand the lvx builtins. */
15490 static rtx
15491 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15493 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15494 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15495 tree arg0;
15496 machine_mode tmode, mode0;
15497 rtx pat, op0;
15498 enum insn_code icode;
15500 switch (fcode)
15502 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15503 icode = CODE_FOR_vector_altivec_load_v16qi;
15504 break;
15505 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15506 icode = CODE_FOR_vector_altivec_load_v8hi;
15507 break;
15508 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15509 icode = CODE_FOR_vector_altivec_load_v4si;
15510 break;
15511 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15512 icode = CODE_FOR_vector_altivec_load_v4sf;
15513 break;
15514 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15515 icode = CODE_FOR_vector_altivec_load_v2df;
15516 break;
15517 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15518 icode = CODE_FOR_vector_altivec_load_v2di;
15519 break;
15520 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15521 icode = CODE_FOR_vector_altivec_load_v1ti;
15522 break;
15523 default:
15524 *expandedp = false;
15525 return NULL_RTX;
15528 *expandedp = true;
15530 arg0 = CALL_EXPR_ARG (exp, 0);
15531 op0 = expand_normal (arg0);
15532 tmode = insn_data[icode].operand[0].mode;
15533 mode0 = insn_data[icode].operand[1].mode;
15535 if (target == 0
15536 || GET_MODE (target) != tmode
15537 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15538 target = gen_reg_rtx (tmode);
15540 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15541 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15543 pat = GEN_FCN (icode) (target, op0);
15544 if (! pat)
15545 return 0;
15546 emit_insn (pat);
15547 return target;
15550 /* Expand the stvx builtins. */
15551 static rtx
15552 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15553 bool *expandedp)
15555 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15556 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15557 tree arg0, arg1;
15558 machine_mode mode0, mode1;
15559 rtx pat, op0, op1;
15560 enum insn_code icode;
15562 switch (fcode)
15564 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15565 icode = CODE_FOR_vector_altivec_store_v16qi;
15566 break;
15567 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15568 icode = CODE_FOR_vector_altivec_store_v8hi;
15569 break;
15570 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15571 icode = CODE_FOR_vector_altivec_store_v4si;
15572 break;
15573 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15574 icode = CODE_FOR_vector_altivec_store_v4sf;
15575 break;
15576 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15577 icode = CODE_FOR_vector_altivec_store_v2df;
15578 break;
15579 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15580 icode = CODE_FOR_vector_altivec_store_v2di;
15581 break;
15582 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15583 icode = CODE_FOR_vector_altivec_store_v1ti;
15584 break;
15585 default:
15586 *expandedp = false;
15587 return NULL_RTX;
15590 arg0 = CALL_EXPR_ARG (exp, 0);
15591 arg1 = CALL_EXPR_ARG (exp, 1);
15592 op0 = expand_normal (arg0);
15593 op1 = expand_normal (arg1);
15594 mode0 = insn_data[icode].operand[0].mode;
15595 mode1 = insn_data[icode].operand[1].mode;
15597 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15598 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15599 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15600 op1 = copy_to_mode_reg (mode1, op1);
15602 pat = GEN_FCN (icode) (op0, op1);
15603 if (pat)
15604 emit_insn (pat);
15606 *expandedp = true;
15607 return NULL_RTX;
15610 /* Expand the dst builtins. */
15611 static rtx
15612 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15613 bool *expandedp)
15615 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15616 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15617 tree arg0, arg1, arg2;
15618 machine_mode mode0, mode1;
15619 rtx pat, op0, op1, op2;
15620 const struct builtin_description *d;
15621 size_t i;
15623 *expandedp = false;
15625 /* Handle DST variants. */
15626 d = bdesc_dst;
15627 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15628 if (d->code == fcode)
15630 arg0 = CALL_EXPR_ARG (exp, 0);
15631 arg1 = CALL_EXPR_ARG (exp, 1);
15632 arg2 = CALL_EXPR_ARG (exp, 2);
15633 op0 = expand_normal (arg0);
15634 op1 = expand_normal (arg1);
15635 op2 = expand_normal (arg2);
15636 mode0 = insn_data[d->icode].operand[0].mode;
15637 mode1 = insn_data[d->icode].operand[1].mode;
15639 /* Invalid arguments, bail out before generating bad rtl. */
15640 if (arg0 == error_mark_node
15641 || arg1 == error_mark_node
15642 || arg2 == error_mark_node)
15643 return const0_rtx;
15645 *expandedp = true;
15646 STRIP_NOPS (arg2);
15647 if (TREE_CODE (arg2) != INTEGER_CST
15648 || TREE_INT_CST_LOW (arg2) & ~0x3)
15650 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15651 return const0_rtx;
15654 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15655 op0 = copy_to_mode_reg (Pmode, op0);
15656 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15657 op1 = copy_to_mode_reg (mode1, op1);
15659 pat = GEN_FCN (d->icode) (op0, op1, op2);
15660 if (pat != 0)
15661 emit_insn (pat);
15663 return NULL_RTX;
15666 return NULL_RTX;
15669 /* Expand vec_init builtin. */
15670 static rtx
15671 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15673 machine_mode tmode = TYPE_MODE (type);
15674 machine_mode inner_mode = GET_MODE_INNER (tmode);
15675 int i, n_elt = GET_MODE_NUNITS (tmode);
15677 gcc_assert (VECTOR_MODE_P (tmode));
15678 gcc_assert (n_elt == call_expr_nargs (exp));
15680 if (!target || !register_operand (target, tmode))
15681 target = gen_reg_rtx (tmode);
15683 /* If we have a vector compromised of a single element, such as V1TImode, do
15684 the initialization directly. */
15685 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15687 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15688 emit_move_insn (target, gen_lowpart (tmode, x));
15690 else
15692 rtvec v = rtvec_alloc (n_elt);
15694 for (i = 0; i < n_elt; ++i)
15696 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15697 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15700 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15703 return target;
15706 /* Return the integer constant in ARG. Constrain it to be in the range
15707 of the subparts of VEC_TYPE; issue an error if not. */
15709 static int
15710 get_element_number (tree vec_type, tree arg)
15712 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15714 if (!tree_fits_uhwi_p (arg)
15715 || (elt = tree_to_uhwi (arg), elt > max))
15717 error ("selector must be an integer constant in the range 0..%wi", max);
15718 return 0;
15721 return elt;
15724 /* Expand vec_set builtin. */
15725 static rtx
15726 altivec_expand_vec_set_builtin (tree exp)
15728 machine_mode tmode, mode1;
15729 tree arg0, arg1, arg2;
15730 int elt;
15731 rtx op0, op1;
15733 arg0 = CALL_EXPR_ARG (exp, 0);
15734 arg1 = CALL_EXPR_ARG (exp, 1);
15735 arg2 = CALL_EXPR_ARG (exp, 2);
15737 tmode = TYPE_MODE (TREE_TYPE (arg0));
15738 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15739 gcc_assert (VECTOR_MODE_P (tmode));
15741 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15742 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15743 elt = get_element_number (TREE_TYPE (arg0), arg2);
15745 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15746 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15748 op0 = force_reg (tmode, op0);
15749 op1 = force_reg (mode1, op1);
15751 rs6000_expand_vector_set (op0, op1, elt);
15753 return op0;
15756 /* Expand vec_ext builtin. */
15757 static rtx
15758 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15760 machine_mode tmode, mode0;
15761 tree arg0, arg1;
15762 rtx op0;
15763 rtx op1;
15765 arg0 = CALL_EXPR_ARG (exp, 0);
15766 arg1 = CALL_EXPR_ARG (exp, 1);
15768 op0 = expand_normal (arg0);
15769 op1 = expand_normal (arg1);
15771 /* Call get_element_number to validate arg1 if it is a constant. */
15772 if (TREE_CODE (arg1) == INTEGER_CST)
15773 (void) get_element_number (TREE_TYPE (arg0), arg1);
15775 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15776 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15777 gcc_assert (VECTOR_MODE_P (mode0));
15779 op0 = force_reg (mode0, op0);
15781 if (optimize || !target || !register_operand (target, tmode))
15782 target = gen_reg_rtx (tmode);
15784 rs6000_expand_vector_extract (target, op0, op1);
15786 return target;
15789 /* Expand the builtin in EXP and store the result in TARGET. Store
15790 true in *EXPANDEDP if we found a builtin to expand. */
15791 static rtx
15792 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15794 const struct builtin_description *d;
15795 size_t i;
15796 enum insn_code icode;
15797 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15798 tree arg0, arg1, arg2;
15799 rtx op0, pat;
15800 machine_mode tmode, mode0;
15801 enum rs6000_builtins fcode
15802 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15804 if (rs6000_overloaded_builtin_p (fcode))
15806 *expandedp = true;
15807 error ("unresolved overload for Altivec builtin %qF", fndecl);
15809 /* Given it is invalid, just generate a normal call. */
15810 return expand_call (exp, target, false);
15813 target = altivec_expand_ld_builtin (exp, target, expandedp);
15814 if (*expandedp)
15815 return target;
15817 target = altivec_expand_st_builtin (exp, target, expandedp);
15818 if (*expandedp)
15819 return target;
15821 target = altivec_expand_dst_builtin (exp, target, expandedp);
15822 if (*expandedp)
15823 return target;
15825 *expandedp = true;
15827 switch (fcode)
15829 case ALTIVEC_BUILTIN_STVX_V2DF:
15830 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15831 case ALTIVEC_BUILTIN_STVX_V2DI:
15832 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15833 case ALTIVEC_BUILTIN_STVX_V4SF:
15834 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15835 case ALTIVEC_BUILTIN_STVX:
15836 case ALTIVEC_BUILTIN_STVX_V4SI:
15837 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15838 case ALTIVEC_BUILTIN_STVX_V8HI:
15839 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15840 case ALTIVEC_BUILTIN_STVX_V16QI:
15841 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15842 case ALTIVEC_BUILTIN_STVEBX:
15843 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15844 case ALTIVEC_BUILTIN_STVEHX:
15845 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15846 case ALTIVEC_BUILTIN_STVEWX:
15847 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15848 case ALTIVEC_BUILTIN_STVXL_V2DF:
15849 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15850 case ALTIVEC_BUILTIN_STVXL_V2DI:
15851 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15852 case ALTIVEC_BUILTIN_STVXL_V4SF:
15853 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15854 case ALTIVEC_BUILTIN_STVXL:
15855 case ALTIVEC_BUILTIN_STVXL_V4SI:
15856 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15857 case ALTIVEC_BUILTIN_STVXL_V8HI:
15858 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15859 case ALTIVEC_BUILTIN_STVXL_V16QI:
15860 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15862 case ALTIVEC_BUILTIN_STVLX:
15863 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15864 case ALTIVEC_BUILTIN_STVLXL:
15865 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15866 case ALTIVEC_BUILTIN_STVRX:
15867 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15868 case ALTIVEC_BUILTIN_STVRXL:
15869 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15871 case P9V_BUILTIN_STXVL:
15872 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15874 case VSX_BUILTIN_STXVD2X_V1TI:
15875 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15876 case VSX_BUILTIN_STXVD2X_V2DF:
15877 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15878 case VSX_BUILTIN_STXVD2X_V2DI:
15879 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15880 case VSX_BUILTIN_STXVW4X_V4SF:
15881 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15882 case VSX_BUILTIN_STXVW4X_V4SI:
15883 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15884 case VSX_BUILTIN_STXVW4X_V8HI:
15885 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15886 case VSX_BUILTIN_STXVW4X_V16QI:
15887 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15889 /* For the following on big endian, it's ok to use any appropriate
15890 unaligned-supporting store, so use a generic expander. For
15891 little-endian, the exact element-reversing instruction must
15892 be used. */
15893 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15895 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15896 : CODE_FOR_vsx_st_elemrev_v2df);
15897 return altivec_expand_stv_builtin (code, exp);
15899 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15901 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15902 : CODE_FOR_vsx_st_elemrev_v2di);
15903 return altivec_expand_stv_builtin (code, exp);
15905 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15907 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15908 : CODE_FOR_vsx_st_elemrev_v4sf);
15909 return altivec_expand_stv_builtin (code, exp);
15911 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15913 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15914 : CODE_FOR_vsx_st_elemrev_v4si);
15915 return altivec_expand_stv_builtin (code, exp);
15917 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15919 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15920 : CODE_FOR_vsx_st_elemrev_v8hi);
15921 return altivec_expand_stv_builtin (code, exp);
15923 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15925 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15926 : CODE_FOR_vsx_st_elemrev_v16qi);
15927 return altivec_expand_stv_builtin (code, exp);
15930 case ALTIVEC_BUILTIN_MFVSCR:
15931 icode = CODE_FOR_altivec_mfvscr;
15932 tmode = insn_data[icode].operand[0].mode;
15934 if (target == 0
15935 || GET_MODE (target) != tmode
15936 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15937 target = gen_reg_rtx (tmode);
15939 pat = GEN_FCN (icode) (target);
15940 if (! pat)
15941 return 0;
15942 emit_insn (pat);
15943 return target;
15945 case ALTIVEC_BUILTIN_MTVSCR:
15946 icode = CODE_FOR_altivec_mtvscr;
15947 arg0 = CALL_EXPR_ARG (exp, 0);
15948 op0 = expand_normal (arg0);
15949 mode0 = insn_data[icode].operand[0].mode;
15951 /* If we got invalid arguments bail out before generating bad rtl. */
15952 if (arg0 == error_mark_node)
15953 return const0_rtx;
15955 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15956 op0 = copy_to_mode_reg (mode0, op0);
15958 pat = GEN_FCN (icode) (op0);
15959 if (pat)
15960 emit_insn (pat);
15961 return NULL_RTX;
15963 case ALTIVEC_BUILTIN_DSSALL:
15964 emit_insn (gen_altivec_dssall ());
15965 return NULL_RTX;
15967 case ALTIVEC_BUILTIN_DSS:
15968 icode = CODE_FOR_altivec_dss;
15969 arg0 = CALL_EXPR_ARG (exp, 0);
15970 STRIP_NOPS (arg0);
15971 op0 = expand_normal (arg0);
15972 mode0 = insn_data[icode].operand[0].mode;
15974 /* If we got invalid arguments bail out before generating bad rtl. */
15975 if (arg0 == error_mark_node)
15976 return const0_rtx;
15978 if (TREE_CODE (arg0) != INTEGER_CST
15979 || TREE_INT_CST_LOW (arg0) & ~0x3)
15981 error ("argument to dss must be a 2-bit unsigned literal");
15982 return const0_rtx;
15985 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15986 op0 = copy_to_mode_reg (mode0, op0);
15988 emit_insn (gen_altivec_dss (op0));
15989 return NULL_RTX;
15991 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15992 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15993 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15994 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15995 case VSX_BUILTIN_VEC_INIT_V2DF:
15996 case VSX_BUILTIN_VEC_INIT_V2DI:
15997 case VSX_BUILTIN_VEC_INIT_V1TI:
15998 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
16000 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
16001 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
16002 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
16003 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
16004 case VSX_BUILTIN_VEC_SET_V2DF:
16005 case VSX_BUILTIN_VEC_SET_V2DI:
16006 case VSX_BUILTIN_VEC_SET_V1TI:
16007 return altivec_expand_vec_set_builtin (exp);
16009 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
16010 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
16011 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
16012 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
16013 case VSX_BUILTIN_VEC_EXT_V2DF:
16014 case VSX_BUILTIN_VEC_EXT_V2DI:
16015 case VSX_BUILTIN_VEC_EXT_V1TI:
16016 return altivec_expand_vec_ext_builtin (exp, target);
16018 case P9V_BUILTIN_VEXTRACT4B:
16019 case P9V_BUILTIN_VEC_VEXTRACT4B:
16020 arg1 = CALL_EXPR_ARG (exp, 1);
16021 STRIP_NOPS (arg1);
16023 /* Generate a normal call if it is invalid. */
16024 if (arg1 == error_mark_node)
16025 return expand_call (exp, target, false);
16027 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
16029 error ("second argument to vec_vextract4b must be 0..12");
16030 return expand_call (exp, target, false);
16032 break;
16034 case P9V_BUILTIN_VINSERT4B:
16035 case P9V_BUILTIN_VINSERT4B_DI:
16036 case P9V_BUILTIN_VEC_VINSERT4B:
16037 arg2 = CALL_EXPR_ARG (exp, 2);
16038 STRIP_NOPS (arg2);
16040 /* Generate a normal call if it is invalid. */
16041 if (arg2 == error_mark_node)
16042 return expand_call (exp, target, false);
16044 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
16046 error ("third argument to vec_vinsert4b must be 0..12");
16047 return expand_call (exp, target, false);
16049 break;
16051 default:
16052 break;
16053 /* Fall through. */
16056 /* Expand abs* operations. */
16057 d = bdesc_abs;
16058 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16059 if (d->code == fcode)
16060 return altivec_expand_abs_builtin (d->icode, exp, target);
16062 /* Expand the AltiVec predicates. */
16063 d = bdesc_altivec_preds;
16064 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16065 if (d->code == fcode)
16066 return altivec_expand_predicate_builtin (d->icode, exp, target);
16068 /* LV* are funky. We initialized them differently. */
16069 switch (fcode)
16071 case ALTIVEC_BUILTIN_LVSL:
16072 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
16073 exp, target, false);
16074 case ALTIVEC_BUILTIN_LVSR:
16075 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
16076 exp, target, false);
16077 case ALTIVEC_BUILTIN_LVEBX:
16078 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
16079 exp, target, false);
16080 case ALTIVEC_BUILTIN_LVEHX:
16081 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
16082 exp, target, false);
16083 case ALTIVEC_BUILTIN_LVEWX:
16084 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
16085 exp, target, false);
16086 case ALTIVEC_BUILTIN_LVXL_V2DF:
16087 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
16088 exp, target, false);
16089 case ALTIVEC_BUILTIN_LVXL_V2DI:
16090 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
16091 exp, target, false);
16092 case ALTIVEC_BUILTIN_LVXL_V4SF:
16093 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
16094 exp, target, false);
16095 case ALTIVEC_BUILTIN_LVXL:
16096 case ALTIVEC_BUILTIN_LVXL_V4SI:
16097 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
16098 exp, target, false);
16099 case ALTIVEC_BUILTIN_LVXL_V8HI:
16100 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
16101 exp, target, false);
16102 case ALTIVEC_BUILTIN_LVXL_V16QI:
16103 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
16104 exp, target, false);
16105 case ALTIVEC_BUILTIN_LVX_V2DF:
16106 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
16107 exp, target, false);
16108 case ALTIVEC_BUILTIN_LVX_V2DI:
16109 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
16110 exp, target, false);
16111 case ALTIVEC_BUILTIN_LVX_V4SF:
16112 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
16113 exp, target, false);
16114 case ALTIVEC_BUILTIN_LVX:
16115 case ALTIVEC_BUILTIN_LVX_V4SI:
16116 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
16117 exp, target, false);
16118 case ALTIVEC_BUILTIN_LVX_V8HI:
16119 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
16120 exp, target, false);
16121 case ALTIVEC_BUILTIN_LVX_V16QI:
16122 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16123 exp, target, false);
16124 case ALTIVEC_BUILTIN_LVLX:
16125 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16126 exp, target, true);
16127 case ALTIVEC_BUILTIN_LVLXL:
16128 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16129 exp, target, true);
16130 case ALTIVEC_BUILTIN_LVRX:
16131 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16132 exp, target, true);
16133 case ALTIVEC_BUILTIN_LVRXL:
16134 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16135 exp, target, true);
16136 case VSX_BUILTIN_LXVD2X_V1TI:
16137 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16138 exp, target, false);
16139 case VSX_BUILTIN_LXVD2X_V2DF:
16140 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16141 exp, target, false);
16142 case VSX_BUILTIN_LXVD2X_V2DI:
16143 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16144 exp, target, false);
16145 case VSX_BUILTIN_LXVW4X_V4SF:
16146 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16147 exp, target, false);
16148 case VSX_BUILTIN_LXVW4X_V4SI:
16149 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16150 exp, target, false);
16151 case VSX_BUILTIN_LXVW4X_V8HI:
16152 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16153 exp, target, false);
16154 case VSX_BUILTIN_LXVW4X_V16QI:
16155 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16156 exp, target, false);
16157 /* For the following on big endian, it's ok to use any appropriate
16158 unaligned-supporting load, so use a generic expander. For
16159 little-endian, the exact element-reversing instruction must
16160 be used. */
16161 case VSX_BUILTIN_LD_ELEMREV_V2DF:
16163 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16164 : CODE_FOR_vsx_ld_elemrev_v2df);
16165 return altivec_expand_lv_builtin (code, exp, target, false);
16167 case VSX_BUILTIN_LD_ELEMREV_V2DI:
16169 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16170 : CODE_FOR_vsx_ld_elemrev_v2di);
16171 return altivec_expand_lv_builtin (code, exp, target, false);
16173 case VSX_BUILTIN_LD_ELEMREV_V4SF:
16175 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16176 : CODE_FOR_vsx_ld_elemrev_v4sf);
16177 return altivec_expand_lv_builtin (code, exp, target, false);
16179 case VSX_BUILTIN_LD_ELEMREV_V4SI:
16181 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16182 : CODE_FOR_vsx_ld_elemrev_v4si);
16183 return altivec_expand_lv_builtin (code, exp, target, false);
16185 case VSX_BUILTIN_LD_ELEMREV_V8HI:
16187 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16188 : CODE_FOR_vsx_ld_elemrev_v8hi);
16189 return altivec_expand_lv_builtin (code, exp, target, false);
16191 case VSX_BUILTIN_LD_ELEMREV_V16QI:
16193 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16194 : CODE_FOR_vsx_ld_elemrev_v16qi);
16195 return altivec_expand_lv_builtin (code, exp, target, false);
16197 break;
16198 default:
16199 break;
16200 /* Fall through. */
16203 *expandedp = false;
16204 return NULL_RTX;
16207 /* Expand the builtin in EXP and store the result in TARGET. Store
16208 true in *EXPANDEDP if we found a builtin to expand. */
16209 static rtx
16210 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16212 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16213 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16214 const struct builtin_description *d;
16215 size_t i;
16217 *expandedp = true;
16219 switch (fcode)
16221 case PAIRED_BUILTIN_STX:
16222 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16223 case PAIRED_BUILTIN_LX:
16224 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16225 default:
16226 break;
16227 /* Fall through. */
16230 /* Expand the paired predicates. */
16231 d = bdesc_paired_preds;
16232 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16233 if (d->code == fcode)
16234 return paired_expand_predicate_builtin (d->icode, exp, target);
16236 *expandedp = false;
16237 return NULL_RTX;
16240 /* Binops that need to be initialized manually, but can be expanded
16241 automagically by rs6000_expand_binop_builtin. */
16242 static const struct builtin_description bdesc_2arg_spe[] =
16244 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
16245 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
16246 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
16247 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
16248 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
16249 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
16250 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
16251 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
16252 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
16253 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
16254 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
16255 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
16256 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
16257 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
16258 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
16259 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
16260 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
16261 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
16262 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
16263 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
16264 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
16265 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
16268 /* Expand the builtin in EXP and store the result in TARGET. Store
16269 true in *EXPANDEDP if we found a builtin to expand.
16271 This expands the SPE builtins that are not simple unary and binary
16272 operations. */
16273 static rtx
16274 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
16276 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16277 tree arg1, arg0;
16278 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16279 enum insn_code icode;
16280 machine_mode tmode, mode0;
16281 rtx pat, op0;
16282 const struct builtin_description *d;
16283 size_t i;
16285 *expandedp = true;
16287 /* Syntax check for a 5-bit unsigned immediate. */
16288 switch (fcode)
16290 case SPE_BUILTIN_EVSTDD:
16291 case SPE_BUILTIN_EVSTDH:
16292 case SPE_BUILTIN_EVSTDW:
16293 case SPE_BUILTIN_EVSTWHE:
16294 case SPE_BUILTIN_EVSTWHO:
16295 case SPE_BUILTIN_EVSTWWE:
16296 case SPE_BUILTIN_EVSTWWO:
16297 arg1 = CALL_EXPR_ARG (exp, 2);
16298 if (TREE_CODE (arg1) != INTEGER_CST
16299 || TREE_INT_CST_LOW (arg1) & ~0x1f)
16301 error ("argument 2 must be a 5-bit unsigned literal");
16302 return const0_rtx;
16304 break;
16305 default:
16306 break;
16309 /* The evsplat*i instructions are not quite generic. */
16310 switch (fcode)
16312 case SPE_BUILTIN_EVSPLATFI:
16313 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
16314 exp, target);
16315 case SPE_BUILTIN_EVSPLATI:
16316 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
16317 exp, target);
16318 default:
16319 break;
16322 d = bdesc_2arg_spe;
16323 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
16324 if (d->code == fcode)
16325 return rs6000_expand_binop_builtin (d->icode, exp, target);
16327 d = bdesc_spe_predicates;
16328 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16329 if (d->code == fcode)
16330 return spe_expand_predicate_builtin (d->icode, exp, target);
16332 d = bdesc_spe_evsel;
16333 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16334 if (d->code == fcode)
16335 return spe_expand_evsel_builtin (d->icode, exp, target);
16337 switch (fcode)
16339 case SPE_BUILTIN_EVSTDDX:
16340 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16341 case SPE_BUILTIN_EVSTDHX:
16342 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16343 case SPE_BUILTIN_EVSTDWX:
16344 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16345 case SPE_BUILTIN_EVSTWHEX:
16346 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16347 case SPE_BUILTIN_EVSTWHOX:
16348 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16349 case SPE_BUILTIN_EVSTWWEX:
16350 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16351 case SPE_BUILTIN_EVSTWWOX:
16352 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16353 case SPE_BUILTIN_EVSTDD:
16354 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16355 case SPE_BUILTIN_EVSTDH:
16356 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16357 case SPE_BUILTIN_EVSTDW:
16358 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16359 case SPE_BUILTIN_EVSTWHE:
16360 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16361 case SPE_BUILTIN_EVSTWHO:
16362 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16363 case SPE_BUILTIN_EVSTWWE:
16364 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16365 case SPE_BUILTIN_EVSTWWO:
16366 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16367 case SPE_BUILTIN_MFSPEFSCR:
16368 icode = CODE_FOR_spe_mfspefscr;
16369 tmode = insn_data[icode].operand[0].mode;
16371 if (target == 0
16372 || GET_MODE (target) != tmode
16373 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16374 target = gen_reg_rtx (tmode);
16376 pat = GEN_FCN (icode) (target);
16377 if (! pat)
16378 return 0;
16379 emit_insn (pat);
16380 return target;
16381 case SPE_BUILTIN_MTSPEFSCR:
16382 icode = CODE_FOR_spe_mtspefscr;
16383 arg0 = CALL_EXPR_ARG (exp, 0);
16384 op0 = expand_normal (arg0);
16385 mode0 = insn_data[icode].operand[0].mode;
16387 if (arg0 == error_mark_node)
16388 return const0_rtx;
16390 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16391 op0 = copy_to_mode_reg (mode0, op0);
16393 pat = GEN_FCN (icode) (op0);
16394 if (pat)
16395 emit_insn (pat);
16396 return NULL_RTX;
16397 default:
16398 break;
16401 *expandedp = false;
16402 return NULL_RTX;
16405 static rtx
16406 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16408 rtx pat, scratch, tmp;
16409 tree form = CALL_EXPR_ARG (exp, 0);
16410 tree arg0 = CALL_EXPR_ARG (exp, 1);
16411 tree arg1 = CALL_EXPR_ARG (exp, 2);
16412 rtx op0 = expand_normal (arg0);
16413 rtx op1 = expand_normal (arg1);
16414 machine_mode mode0 = insn_data[icode].operand[1].mode;
16415 machine_mode mode1 = insn_data[icode].operand[2].mode;
16416 int form_int;
16417 enum rtx_code code;
16419 if (TREE_CODE (form) != INTEGER_CST)
16421 error ("argument 1 of __builtin_paired_predicate must be a constant");
16422 return const0_rtx;
16424 else
16425 form_int = TREE_INT_CST_LOW (form);
16427 gcc_assert (mode0 == mode1);
16429 if (arg0 == error_mark_node || arg1 == error_mark_node)
16430 return const0_rtx;
16432 if (target == 0
16433 || GET_MODE (target) != SImode
16434 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16435 target = gen_reg_rtx (SImode);
16436 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16437 op0 = copy_to_mode_reg (mode0, op0);
16438 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16439 op1 = copy_to_mode_reg (mode1, op1);
16441 scratch = gen_reg_rtx (CCFPmode);
16443 pat = GEN_FCN (icode) (scratch, op0, op1);
16444 if (!pat)
16445 return const0_rtx;
16447 emit_insn (pat);
16449 switch (form_int)
16451 /* LT bit. */
16452 case 0:
16453 code = LT;
16454 break;
16455 /* GT bit. */
16456 case 1:
16457 code = GT;
16458 break;
16459 /* EQ bit. */
16460 case 2:
16461 code = EQ;
16462 break;
16463 /* UN bit. */
16464 case 3:
16465 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16466 return target;
16467 default:
16468 error ("argument 1 of __builtin_paired_predicate is out of range");
16469 return const0_rtx;
16472 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16473 emit_move_insn (target, tmp);
16474 return target;
16477 static rtx
16478 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16480 rtx pat, scratch, tmp;
16481 tree form = CALL_EXPR_ARG (exp, 0);
16482 tree arg0 = CALL_EXPR_ARG (exp, 1);
16483 tree arg1 = CALL_EXPR_ARG (exp, 2);
16484 rtx op0 = expand_normal (arg0);
16485 rtx op1 = expand_normal (arg1);
16486 machine_mode mode0 = insn_data[icode].operand[1].mode;
16487 machine_mode mode1 = insn_data[icode].operand[2].mode;
16488 int form_int;
16489 enum rtx_code code;
16491 if (TREE_CODE (form) != INTEGER_CST)
16493 error ("argument 1 of __builtin_spe_predicate must be a constant");
16494 return const0_rtx;
16496 else
16497 form_int = TREE_INT_CST_LOW (form);
16499 gcc_assert (mode0 == mode1);
16501 if (arg0 == error_mark_node || arg1 == error_mark_node)
16502 return const0_rtx;
16504 if (target == 0
16505 || GET_MODE (target) != SImode
16506 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16507 target = gen_reg_rtx (SImode);
16509 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16510 op0 = copy_to_mode_reg (mode0, op0);
16511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16512 op1 = copy_to_mode_reg (mode1, op1);
16514 scratch = gen_reg_rtx (CCmode);
16516 pat = GEN_FCN (icode) (scratch, op0, op1);
16517 if (! pat)
16518 return const0_rtx;
16519 emit_insn (pat);
16521 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16522 _lower_. We use one compare, but look in different bits of the
16523 CR for each variant.
16525 There are 2 elements in each SPE simd type (upper/lower). The CR
16526 bits are set as follows:
16528 BIT0 | BIT 1 | BIT 2 | BIT 3
16529 U | L | (U | L) | (U & L)
16531 So, for an "all" relationship, BIT 3 would be set.
16532 For an "any" relationship, BIT 2 would be set. Etc.
16534 Following traditional nomenclature, these bits map to:
16536 BIT0 | BIT 1 | BIT 2 | BIT 3
16537 LT | GT | EQ | OV
16539 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16542 switch (form_int)
16544 /* All variant. OV bit. */
16545 case 0:
16546 /* We need to get to the OV bit, which is the ORDERED bit. We
16547 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16548 that's ugly and will make validate_condition_mode die.
16549 So let's just use another pattern. */
16550 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16551 return target;
16552 /* Any variant. EQ bit. */
16553 case 1:
16554 code = EQ;
16555 break;
16556 /* Upper variant. LT bit. */
16557 case 2:
16558 code = LT;
16559 break;
16560 /* Lower variant. GT bit. */
16561 case 3:
16562 code = GT;
16563 break;
16564 default:
16565 error ("argument 1 of __builtin_spe_predicate is out of range");
16566 return const0_rtx;
16569 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16570 emit_move_insn (target, tmp);
16572 return target;
16575 /* The evsel builtins look like this:
16577 e = __builtin_spe_evsel_OP (a, b, c, d);
16579 and work like this:
16581 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16582 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16585 static rtx
16586 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16588 rtx pat, scratch;
16589 tree arg0 = CALL_EXPR_ARG (exp, 0);
16590 tree arg1 = CALL_EXPR_ARG (exp, 1);
16591 tree arg2 = CALL_EXPR_ARG (exp, 2);
16592 tree arg3 = CALL_EXPR_ARG (exp, 3);
16593 rtx op0 = expand_normal (arg0);
16594 rtx op1 = expand_normal (arg1);
16595 rtx op2 = expand_normal (arg2);
16596 rtx op3 = expand_normal (arg3);
16597 machine_mode mode0 = insn_data[icode].operand[1].mode;
16598 machine_mode mode1 = insn_data[icode].operand[2].mode;
16600 gcc_assert (mode0 == mode1);
16602 if (arg0 == error_mark_node || arg1 == error_mark_node
16603 || arg2 == error_mark_node || arg3 == error_mark_node)
16604 return const0_rtx;
16606 if (target == 0
16607 || GET_MODE (target) != mode0
16608 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16609 target = gen_reg_rtx (mode0);
16611 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16612 op0 = copy_to_mode_reg (mode0, op0);
16613 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16614 op1 = copy_to_mode_reg (mode0, op1);
16615 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
16616 op2 = copy_to_mode_reg (mode0, op2);
16617 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
16618 op3 = copy_to_mode_reg (mode0, op3);
16620 /* Generate the compare. */
16621 scratch = gen_reg_rtx (CCmode);
16622 pat = GEN_FCN (icode) (scratch, op0, op1);
16623 if (! pat)
16624 return const0_rtx;
16625 emit_insn (pat);
16627 if (mode0 == V2SImode)
16628 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
16629 else
16630 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
16632 return target;
16635 /* Raise an error message for a builtin function that is called without the
16636 appropriate target options being set. */
16638 static void
16639 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16641 size_t uns_fncode = (size_t)fncode;
16642 const char *name = rs6000_builtin_info[uns_fncode].name;
16643 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16645 gcc_assert (name != NULL);
16646 if ((fnmask & RS6000_BTM_CELL) != 0)
16647 error ("Builtin function %s is only valid for the cell processor", name);
16648 else if ((fnmask & RS6000_BTM_VSX) != 0)
16649 error ("Builtin function %s requires the -mvsx option", name);
16650 else if ((fnmask & RS6000_BTM_HTM) != 0)
16651 error ("Builtin function %s requires the -mhtm option", name);
16652 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16653 error ("Builtin function %s requires the -maltivec option", name);
16654 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16655 error ("Builtin function %s requires the -mpaired option", name);
16656 else if ((fnmask & RS6000_BTM_SPE) != 0)
16657 error ("Builtin function %s requires the -mspe option", name);
16658 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16659 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16660 error ("Builtin function %s requires the -mhard-dfp and"
16661 " -mpower8-vector options", name);
16662 else if ((fnmask & RS6000_BTM_DFP) != 0)
16663 error ("Builtin function %s requires the -mhard-dfp option", name);
16664 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16665 error ("Builtin function %s requires the -mpower8-vector option", name);
16666 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16667 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16668 error ("Builtin function %s requires the -mcpu=power9 and"
16669 " -m64 options", name);
16670 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16671 error ("Builtin function %s requires the -mcpu=power9 option", name);
16672 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16673 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16674 error ("Builtin function %s requires the -mcpu=power9 and"
16675 " -m64 options", name);
16676 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16677 error ("Builtin function %s requires the -mcpu=power9 option", name);
16678 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16679 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16680 error ("Builtin function %s requires the -mhard-float and"
16681 " -mlong-double-128 options", name);
16682 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16683 error ("Builtin function %s requires the -mhard-float option", name);
16684 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16685 error ("Builtin function %s requires the -mfloat128 option", name);
16686 else
16687 error ("Builtin function %s is not supported with the current options",
16688 name);
16691 /* Target hook for early folding of built-ins, shamelessly stolen
16692 from ia64.c. */
16694 static tree
16695 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16696 tree *args, bool ignore ATTRIBUTE_UNUSED)
16698 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16700 enum rs6000_builtins fn_code
16701 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16702 switch (fn_code)
16704 case RS6000_BUILTIN_NANQ:
16705 case RS6000_BUILTIN_NANSQ:
16707 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16708 const char *str = c_getstr (*args);
16709 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16710 REAL_VALUE_TYPE real;
16712 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16713 return build_real (type, real);
16714 return NULL_TREE;
16716 case RS6000_BUILTIN_INFQ:
16717 case RS6000_BUILTIN_HUGE_VALQ:
16719 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16720 REAL_VALUE_TYPE inf;
16721 real_inf (&inf);
16722 return build_real (type, inf);
16724 default:
16725 break;
16728 #ifdef SUBTARGET_FOLD_BUILTIN
16729 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16730 #else
16731 return NULL_TREE;
16732 #endif
16735 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16736 a constant, use rs6000_fold_builtin.) */
16738 bool
16739 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
16741 gimple *stmt = gsi_stmt (*gsi);
16742 tree fndecl = gimple_call_fndecl (stmt);
16743 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
16744 enum rs6000_builtins fn_code
16745 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16746 tree arg0, arg1, lhs;
16748 switch (fn_code)
16750 /* Flavors of vec_add. We deliberately don't expand
16751 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
16752 TImode, resulting in much poorer code generation. */
16753 case ALTIVEC_BUILTIN_VADDUBM:
16754 case ALTIVEC_BUILTIN_VADDUHM:
16755 case ALTIVEC_BUILTIN_VADDUWM:
16756 case P8V_BUILTIN_VADDUDM:
16757 case ALTIVEC_BUILTIN_VADDFP:
16758 case VSX_BUILTIN_XVADDDP:
16760 arg0 = gimple_call_arg (stmt, 0);
16761 arg1 = gimple_call_arg (stmt, 1);
16762 lhs = gimple_call_lhs (stmt);
16763 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
16764 gimple_set_location (g, gimple_location (stmt));
16765 gsi_replace (gsi, g, true);
16766 return true;
16768 /* Flavors of vec_sub. We deliberately don't expand
16769 P8V_BUILTIN_VSUBUQM. */
16770 case ALTIVEC_BUILTIN_VSUBUBM:
16771 case ALTIVEC_BUILTIN_VSUBUHM:
16772 case ALTIVEC_BUILTIN_VSUBUWM:
16773 case P8V_BUILTIN_VSUBUDM:
16774 case ALTIVEC_BUILTIN_VSUBFP:
16775 case VSX_BUILTIN_XVSUBDP:
16777 arg0 = gimple_call_arg (stmt, 0);
16778 arg1 = gimple_call_arg (stmt, 1);
16779 lhs = gimple_call_lhs (stmt);
16780 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
16781 gimple_set_location (g, gimple_location (stmt));
16782 gsi_replace (gsi, g, true);
16783 return true;
16785 /* Even element flavors of vec_mul (signed). */
16786 case ALTIVEC_BUILTIN_VMULESB:
16787 case ALTIVEC_BUILTIN_VMULESH:
16788 /* Even element flavors of vec_mul (unsigned). */
16789 case ALTIVEC_BUILTIN_VMULEUB:
16790 case ALTIVEC_BUILTIN_VMULEUH:
16792 arg0 = gimple_call_arg (stmt, 0);
16793 arg1 = gimple_call_arg (stmt, 1);
16794 lhs = gimple_call_lhs (stmt);
16795 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
16796 gimple_set_location (g, gimple_location (stmt));
16797 gsi_replace (gsi, g, true);
16798 return true;
16800 /* Odd element flavors of vec_mul (signed). */
16801 case ALTIVEC_BUILTIN_VMULOSB:
16802 case ALTIVEC_BUILTIN_VMULOSH:
16803 /* Odd element flavors of vec_mul (unsigned). */
16804 case ALTIVEC_BUILTIN_VMULOUB:
16805 case ALTIVEC_BUILTIN_VMULOUH:
16807 arg0 = gimple_call_arg (stmt, 0);
16808 arg1 = gimple_call_arg (stmt, 1);
16809 lhs = gimple_call_lhs (stmt);
16810 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
16811 gimple_set_location (g, gimple_location (stmt));
16812 gsi_replace (gsi, g, true);
16813 return true;
16816 default:
16817 break;
16820 return false;
16823 /* Expand an expression EXP that calls a built-in function,
16824 with result going to TARGET if that's convenient
16825 (and in mode MODE if that's convenient).
16826 SUBTARGET may be used as the target for computing one of EXP's operands.
16827 IGNORE is nonzero if the value is to be ignored. */
16829 static rtx
16830 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16831 machine_mode mode ATTRIBUTE_UNUSED,
16832 int ignore ATTRIBUTE_UNUSED)
16834 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16835 enum rs6000_builtins fcode
16836 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16837 size_t uns_fcode = (size_t)fcode;
16838 const struct builtin_description *d;
16839 size_t i;
16840 rtx ret;
16841 bool success;
16842 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16843 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16845 if (TARGET_DEBUG_BUILTIN)
16847 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16848 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16849 const char *name2 = ((icode != CODE_FOR_nothing)
16850 ? get_insn_name ((int)icode)
16851 : "nothing");
16852 const char *name3;
16854 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16856 default: name3 = "unknown"; break;
16857 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16858 case RS6000_BTC_UNARY: name3 = "unary"; break;
16859 case RS6000_BTC_BINARY: name3 = "binary"; break;
16860 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16861 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16862 case RS6000_BTC_ABS: name3 = "abs"; break;
16863 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
16864 case RS6000_BTC_DST: name3 = "dst"; break;
16868 fprintf (stderr,
16869 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16870 (name1) ? name1 : "---", fcode,
16871 (name2) ? name2 : "---", (int)icode,
16872 name3,
16873 func_valid_p ? "" : ", not valid");
16876 if (!func_valid_p)
16878 rs6000_invalid_builtin (fcode);
16880 /* Given it is invalid, just generate a normal call. */
16881 return expand_call (exp, target, ignore);
16884 switch (fcode)
16886 case RS6000_BUILTIN_RECIP:
16887 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16889 case RS6000_BUILTIN_RECIPF:
16890 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16892 case RS6000_BUILTIN_RSQRTF:
16893 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16895 case RS6000_BUILTIN_RSQRT:
16896 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16898 case POWER7_BUILTIN_BPERMD:
16899 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16900 ? CODE_FOR_bpermd_di
16901 : CODE_FOR_bpermd_si), exp, target);
16903 case RS6000_BUILTIN_GET_TB:
16904 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16905 target);
16907 case RS6000_BUILTIN_MFTB:
16908 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16909 ? CODE_FOR_rs6000_mftb_di
16910 : CODE_FOR_rs6000_mftb_si),
16911 target);
16913 case RS6000_BUILTIN_MFFS:
16914 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16916 case RS6000_BUILTIN_MTFSF:
16917 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16919 case RS6000_BUILTIN_CPU_INIT:
16920 case RS6000_BUILTIN_CPU_IS:
16921 case RS6000_BUILTIN_CPU_SUPPORTS:
16922 return cpu_expand_builtin (fcode, exp, target);
16924 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16925 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16927 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16928 : (int) CODE_FOR_altivec_lvsl_direct);
16929 machine_mode tmode = insn_data[icode].operand[0].mode;
16930 machine_mode mode = insn_data[icode].operand[1].mode;
16931 tree arg;
16932 rtx op, addr, pat;
16934 gcc_assert (TARGET_ALTIVEC);
16936 arg = CALL_EXPR_ARG (exp, 0);
16937 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16938 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16939 addr = memory_address (mode, op);
16940 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16941 op = addr;
16942 else
16944 /* For the load case need to negate the address. */
16945 op = gen_reg_rtx (GET_MODE (addr));
16946 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16948 op = gen_rtx_MEM (mode, op);
16950 if (target == 0
16951 || GET_MODE (target) != tmode
16952 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16953 target = gen_reg_rtx (tmode);
16955 pat = GEN_FCN (icode) (target, op);
16956 if (!pat)
16957 return 0;
16958 emit_insn (pat);
16960 return target;
16963 case ALTIVEC_BUILTIN_VCFUX:
16964 case ALTIVEC_BUILTIN_VCFSX:
16965 case ALTIVEC_BUILTIN_VCTUXS:
16966 case ALTIVEC_BUILTIN_VCTSXS:
16967 /* FIXME: There's got to be a nicer way to handle this case than
16968 constructing a new CALL_EXPR. */
16969 if (call_expr_nargs (exp) == 1)
16971 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16972 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16974 break;
16976 default:
16977 break;
16980 if (TARGET_ALTIVEC)
16982 ret = altivec_expand_builtin (exp, target, &success);
16984 if (success)
16985 return ret;
16987 if (TARGET_SPE)
16989 ret = spe_expand_builtin (exp, target, &success);
16991 if (success)
16992 return ret;
16994 if (TARGET_PAIRED_FLOAT)
16996 ret = paired_expand_builtin (exp, target, &success);
16998 if (success)
16999 return ret;
17001 if (TARGET_HTM)
17003 ret = htm_expand_builtin (exp, target, &success);
17005 if (success)
17006 return ret;
17009 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
17010 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17011 gcc_assert (attr == RS6000_BTC_UNARY
17012 || attr == RS6000_BTC_BINARY
17013 || attr == RS6000_BTC_TERNARY
17014 || attr == RS6000_BTC_SPECIAL);
17016 /* Handle simple unary operations. */
17017 d = bdesc_1arg;
17018 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17019 if (d->code == fcode)
17020 return rs6000_expand_unop_builtin (d->icode, exp, target);
17022 /* Handle simple binary operations. */
17023 d = bdesc_2arg;
17024 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17025 if (d->code == fcode)
17026 return rs6000_expand_binop_builtin (d->icode, exp, target);
17028 /* Handle simple ternary operations. */
17029 d = bdesc_3arg;
17030 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17031 if (d->code == fcode)
17032 return rs6000_expand_ternop_builtin (d->icode, exp, target);
17034 /* Handle simple no-argument operations. */
17035 d = bdesc_0arg;
17036 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17037 if (d->code == fcode)
17038 return rs6000_expand_zeroop_builtin (d->icode, target);
17040 gcc_unreachable ();
17043 static void
17044 rs6000_init_builtins (void)
17046 tree tdecl;
17047 tree ftype;
17048 machine_mode mode;
17050 if (TARGET_DEBUG_BUILTIN)
17051 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
17052 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
17053 (TARGET_SPE) ? ", spe" : "",
17054 (TARGET_ALTIVEC) ? ", altivec" : "",
17055 (TARGET_VSX) ? ", vsx" : "");
17057 V2SI_type_node = build_vector_type (intSI_type_node, 2);
17058 V2SF_type_node = build_vector_type (float_type_node, 2);
17059 V2DI_type_node = build_vector_type (intDI_type_node, 2);
17060 V2DF_type_node = build_vector_type (double_type_node, 2);
17061 V4HI_type_node = build_vector_type (intHI_type_node, 4);
17062 V4SI_type_node = build_vector_type (intSI_type_node, 4);
17063 V4SF_type_node = build_vector_type (float_type_node, 4);
17064 V8HI_type_node = build_vector_type (intHI_type_node, 8);
17065 V16QI_type_node = build_vector_type (intQI_type_node, 16);
17067 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
17068 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
17069 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
17070 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
17072 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
17073 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
17074 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
17075 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
17077 const_str_type_node
17078 = build_pointer_type (build_qualified_type (char_type_node,
17079 TYPE_QUAL_CONST));
17081 /* We use V1TI mode as a special container to hold __int128_t items that
17082 must live in VSX registers. */
17083 if (intTI_type_node)
17085 V1TI_type_node = build_vector_type (intTI_type_node, 1);
17086 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
17089 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17090 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17091 'vector unsigned short'. */
17093 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
17094 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17095 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
17096 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
17097 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17099 long_integer_type_internal_node = long_integer_type_node;
17100 long_unsigned_type_internal_node = long_unsigned_type_node;
17101 long_long_integer_type_internal_node = long_long_integer_type_node;
17102 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
17103 intQI_type_internal_node = intQI_type_node;
17104 uintQI_type_internal_node = unsigned_intQI_type_node;
17105 intHI_type_internal_node = intHI_type_node;
17106 uintHI_type_internal_node = unsigned_intHI_type_node;
17107 intSI_type_internal_node = intSI_type_node;
17108 uintSI_type_internal_node = unsigned_intSI_type_node;
17109 intDI_type_internal_node = intDI_type_node;
17110 uintDI_type_internal_node = unsigned_intDI_type_node;
17111 intTI_type_internal_node = intTI_type_node;
17112 uintTI_type_internal_node = unsigned_intTI_type_node;
17113 float_type_internal_node = float_type_node;
17114 double_type_internal_node = double_type_node;
17115 long_double_type_internal_node = long_double_type_node;
17116 dfloat64_type_internal_node = dfloat64_type_node;
17117 dfloat128_type_internal_node = dfloat128_type_node;
17118 void_type_internal_node = void_type_node;
17120 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17121 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17122 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17123 format that uses a pair of doubles, depending on the switches and
17124 defaults.
17126 We do not enable the actual __float128 keyword unless the user explicitly
17127 asks for it, because the library support is not yet complete.
17129 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17130 floating point, we need make sure the type is non-zero or else self-test
17131 fails during bootstrap.
17133 We don't register a built-in type for __ibm128 if the type is the same as
17134 long double. Instead we add a #define for __ibm128 in
17135 rs6000_cpu_cpp_builtins to long double. */
17136 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17138 ibm128_float_type_node = make_node (REAL_TYPE);
17139 TYPE_PRECISION (ibm128_float_type_node) = 128;
17140 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17141 layout_type (ibm128_float_type_node);
17143 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17144 "__ibm128");
17146 else
17147 ibm128_float_type_node = long_double_type_node;
17149 if (TARGET_FLOAT128_KEYWORD)
17151 ieee128_float_type_node = float128_type_node;
17152 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17153 "__float128");
17156 else if (TARGET_FLOAT128_TYPE)
17158 ieee128_float_type_node = make_node (REAL_TYPE);
17159 TYPE_PRECISION (ibm128_float_type_node) = 128;
17160 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17161 layout_type (ieee128_float_type_node);
17163 /* If we are not exporting the __float128/_Float128 keywords, we need a
17164 keyword to get the types created. Use __ieee128 as the dummy
17165 keyword. */
17166 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17167 "__ieee128");
17170 else
17171 ieee128_float_type_node = long_double_type_node;
17173 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17174 tree type node. */
17175 builtin_mode_to_type[QImode][0] = integer_type_node;
17176 builtin_mode_to_type[HImode][0] = integer_type_node;
17177 builtin_mode_to_type[SImode][0] = intSI_type_node;
17178 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17179 builtin_mode_to_type[DImode][0] = intDI_type_node;
17180 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17181 builtin_mode_to_type[TImode][0] = intTI_type_node;
17182 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17183 builtin_mode_to_type[SFmode][0] = float_type_node;
17184 builtin_mode_to_type[DFmode][0] = double_type_node;
17185 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17186 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17187 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17188 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17189 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17190 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17191 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17192 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17193 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17194 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17195 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17196 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17197 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
17198 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17199 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17200 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17201 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17202 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17203 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17204 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17206 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17207 TYPE_NAME (bool_char_type_node) = tdecl;
17209 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17210 TYPE_NAME (bool_short_type_node) = tdecl;
17212 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17213 TYPE_NAME (bool_int_type_node) = tdecl;
17215 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17216 TYPE_NAME (pixel_type_node) = tdecl;
17218 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
17219 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
17220 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
17221 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
17222 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
17224 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
17225 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
17227 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
17228 TYPE_NAME (V16QI_type_node) = tdecl;
17230 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
17231 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
17233 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
17234 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
17236 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
17237 TYPE_NAME (V8HI_type_node) = tdecl;
17239 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
17240 TYPE_NAME (bool_V8HI_type_node) = tdecl;
17242 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
17243 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
17245 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
17246 TYPE_NAME (V4SI_type_node) = tdecl;
17248 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
17249 TYPE_NAME (bool_V4SI_type_node) = tdecl;
17251 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
17252 TYPE_NAME (V4SF_type_node) = tdecl;
17254 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
17255 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
17257 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
17258 TYPE_NAME (V2DF_type_node) = tdecl;
17260 if (TARGET_POWERPC64)
17262 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
17263 TYPE_NAME (V2DI_type_node) = tdecl;
17265 tdecl = add_builtin_type ("__vector unsigned long",
17266 unsigned_V2DI_type_node);
17267 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
17269 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
17270 TYPE_NAME (bool_V2DI_type_node) = tdecl;
17272 else
17274 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
17275 TYPE_NAME (V2DI_type_node) = tdecl;
17277 tdecl = add_builtin_type ("__vector unsigned long long",
17278 unsigned_V2DI_type_node);
17279 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
17281 tdecl = add_builtin_type ("__vector __bool long long",
17282 bool_V2DI_type_node);
17283 TYPE_NAME (bool_V2DI_type_node) = tdecl;
17286 if (V1TI_type_node)
17288 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
17289 TYPE_NAME (V1TI_type_node) = tdecl;
17291 tdecl = add_builtin_type ("__vector unsigned __int128",
17292 unsigned_V1TI_type_node);
17293 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
17296 /* Paired and SPE builtins are only available if you build a compiler with
17297 the appropriate options, so only create those builtins with the
17298 appropriate compiler option. Create Altivec and VSX builtins on machines
17299 with at least the general purpose extensions (970 and newer) to allow the
17300 use of the target attribute. */
17301 if (TARGET_PAIRED_FLOAT)
17302 paired_init_builtins ();
17303 if (TARGET_SPE)
17304 spe_init_builtins ();
17305 if (TARGET_EXTRA_BUILTINS)
17306 altivec_init_builtins ();
17307 if (TARGET_HTM)
17308 htm_init_builtins ();
17310 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
17311 rs6000_common_init_builtins ();
17313 ftype = build_function_type_list (ieee128_float_type_node,
17314 const_str_type_node, NULL_TREE);
17315 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17316 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17318 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17319 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17320 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17322 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17323 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17324 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17326 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17327 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17328 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17330 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17331 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17332 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17334 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17335 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17336 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17338 mode = (TARGET_64BIT) ? DImode : SImode;
17339 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17340 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17341 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17343 ftype = build_function_type_list (unsigned_intDI_type_node,
17344 NULL_TREE);
17345 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17347 if (TARGET_64BIT)
17348 ftype = build_function_type_list (unsigned_intDI_type_node,
17349 NULL_TREE);
17350 else
17351 ftype = build_function_type_list (unsigned_intSI_type_node,
17352 NULL_TREE);
17353 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17355 ftype = build_function_type_list (double_type_node, NULL_TREE);
17356 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17358 ftype = build_function_type_list (void_type_node,
17359 intSI_type_node, double_type_node,
17360 NULL_TREE);
17361 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17363 ftype = build_function_type_list (void_type_node, NULL_TREE);
17364 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17366 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17367 NULL_TREE);
17368 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17369 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17371 /* AIX libm provides clog as __clog. */
17372 if (TARGET_XCOFF &&
17373 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17374 set_user_assembler_name (tdecl, "__clog");
17376 #ifdef SUBTARGET_INIT_BUILTINS
17377 SUBTARGET_INIT_BUILTINS;
17378 #endif
17381 /* Returns the rs6000 builtin decl for CODE. */
17383 static tree
17384 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17386 HOST_WIDE_INT fnmask;
17388 if (code >= RS6000_BUILTIN_COUNT)
17389 return error_mark_node;
17391 fnmask = rs6000_builtin_info[code].mask;
17392 if ((fnmask & rs6000_builtin_mask) != fnmask)
17394 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17395 return error_mark_node;
17398 return rs6000_builtin_decls[code];
17401 static void
17402 spe_init_builtins (void)
17404 tree puint_type_node = build_pointer_type (unsigned_type_node);
17405 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
17406 const struct builtin_description *d;
17407 size_t i;
17408 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17410 tree v2si_ftype_4_v2si
17411 = build_function_type_list (opaque_V2SI_type_node,
17412 opaque_V2SI_type_node,
17413 opaque_V2SI_type_node,
17414 opaque_V2SI_type_node,
17415 opaque_V2SI_type_node,
17416 NULL_TREE);
17418 tree v2sf_ftype_4_v2sf
17419 = build_function_type_list (opaque_V2SF_type_node,
17420 opaque_V2SF_type_node,
17421 opaque_V2SF_type_node,
17422 opaque_V2SF_type_node,
17423 opaque_V2SF_type_node,
17424 NULL_TREE);
17426 tree int_ftype_int_v2si_v2si
17427 = build_function_type_list (integer_type_node,
17428 integer_type_node,
17429 opaque_V2SI_type_node,
17430 opaque_V2SI_type_node,
17431 NULL_TREE);
17433 tree int_ftype_int_v2sf_v2sf
17434 = build_function_type_list (integer_type_node,
17435 integer_type_node,
17436 opaque_V2SF_type_node,
17437 opaque_V2SF_type_node,
17438 NULL_TREE);
17440 tree void_ftype_v2si_puint_int
17441 = build_function_type_list (void_type_node,
17442 opaque_V2SI_type_node,
17443 puint_type_node,
17444 integer_type_node,
17445 NULL_TREE);
17447 tree void_ftype_v2si_puint_char
17448 = build_function_type_list (void_type_node,
17449 opaque_V2SI_type_node,
17450 puint_type_node,
17451 char_type_node,
17452 NULL_TREE);
17454 tree void_ftype_v2si_pv2si_int
17455 = build_function_type_list (void_type_node,
17456 opaque_V2SI_type_node,
17457 opaque_p_V2SI_type_node,
17458 integer_type_node,
17459 NULL_TREE);
17461 tree void_ftype_v2si_pv2si_char
17462 = build_function_type_list (void_type_node,
17463 opaque_V2SI_type_node,
17464 opaque_p_V2SI_type_node,
17465 char_type_node,
17466 NULL_TREE);
17468 tree void_ftype_int
17469 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17471 tree int_ftype_void
17472 = build_function_type_list (integer_type_node, NULL_TREE);
17474 tree v2si_ftype_pv2si_int
17475 = build_function_type_list (opaque_V2SI_type_node,
17476 opaque_p_V2SI_type_node,
17477 integer_type_node,
17478 NULL_TREE);
17480 tree v2si_ftype_puint_int
17481 = build_function_type_list (opaque_V2SI_type_node,
17482 puint_type_node,
17483 integer_type_node,
17484 NULL_TREE);
17486 tree v2si_ftype_pushort_int
17487 = build_function_type_list (opaque_V2SI_type_node,
17488 pushort_type_node,
17489 integer_type_node,
17490 NULL_TREE);
17492 tree v2si_ftype_signed_char
17493 = build_function_type_list (opaque_V2SI_type_node,
17494 signed_char_type_node,
17495 NULL_TREE);
17497 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
17499 /* Initialize irregular SPE builtins. */
17501 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
17502 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
17503 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
17504 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
17505 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
17506 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
17507 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
17508 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
17509 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
17510 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
17511 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
17512 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
17513 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
17514 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
17515 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
17516 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
17517 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
17518 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
17520 /* Loads. */
17521 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
17522 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
17523 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
17524 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
17525 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
17526 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
17527 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
17528 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
17529 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
17530 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
17531 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
17532 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
17533 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
17534 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
17535 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
17536 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
17537 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
17538 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
17539 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
17540 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
17541 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
17542 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
17544 /* Predicates. */
17545 d = bdesc_spe_predicates;
17546 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
17548 tree type;
17549 HOST_WIDE_INT mask = d->mask;
17551 if ((mask & builtin_mask) != mask)
17553 if (TARGET_DEBUG_BUILTIN)
17554 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
17555 d->name);
17556 continue;
17559 /* Cannot define builtin if the instruction is disabled. */
17560 gcc_assert (d->icode != CODE_FOR_nothing);
17561 switch (insn_data[d->icode].operand[1].mode)
17563 case V2SImode:
17564 type = int_ftype_int_v2si_v2si;
17565 break;
17566 case V2SFmode:
17567 type = int_ftype_int_v2sf_v2sf;
17568 break;
17569 default:
17570 gcc_unreachable ();
17573 def_builtin (d->name, type, d->code);
17576 /* Evsel predicates. */
17577 d = bdesc_spe_evsel;
17578 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
17580 tree type;
17581 HOST_WIDE_INT mask = d->mask;
17583 if ((mask & builtin_mask) != mask)
17585 if (TARGET_DEBUG_BUILTIN)
17586 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
17587 d->name);
17588 continue;
17591 /* Cannot define builtin if the instruction is disabled. */
17592 gcc_assert (d->icode != CODE_FOR_nothing);
17593 switch (insn_data[d->icode].operand[1].mode)
17595 case V2SImode:
17596 type = v2si_ftype_4_v2si;
17597 break;
17598 case V2SFmode:
17599 type = v2sf_ftype_4_v2sf;
17600 break;
17601 default:
17602 gcc_unreachable ();
17605 def_builtin (d->name, type, d->code);
17609 static void
17610 paired_init_builtins (void)
17612 const struct builtin_description *d;
17613 size_t i;
17614 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17616 tree int_ftype_int_v2sf_v2sf
17617 = build_function_type_list (integer_type_node,
17618 integer_type_node,
17619 V2SF_type_node,
17620 V2SF_type_node,
17621 NULL_TREE);
17622 tree pcfloat_type_node =
17623 build_pointer_type (build_qualified_type
17624 (float_type_node, TYPE_QUAL_CONST));
17626 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17627 long_integer_type_node,
17628 pcfloat_type_node,
17629 NULL_TREE);
17630 tree void_ftype_v2sf_long_pcfloat =
17631 build_function_type_list (void_type_node,
17632 V2SF_type_node,
17633 long_integer_type_node,
17634 pcfloat_type_node,
17635 NULL_TREE);
17638 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17639 PAIRED_BUILTIN_LX);
17642 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17643 PAIRED_BUILTIN_STX);
17645 /* Predicates. */
17646 d = bdesc_paired_preds;
17647 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17649 tree type;
17650 HOST_WIDE_INT mask = d->mask;
17652 if ((mask & builtin_mask) != mask)
17654 if (TARGET_DEBUG_BUILTIN)
17655 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17656 d->name);
17657 continue;
17660 /* Cannot define builtin if the instruction is disabled. */
17661 gcc_assert (d->icode != CODE_FOR_nothing);
17663 if (TARGET_DEBUG_BUILTIN)
17664 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17665 (int)i, get_insn_name (d->icode), (int)d->icode,
17666 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17668 switch (insn_data[d->icode].operand[1].mode)
17670 case V2SFmode:
17671 type = int_ftype_int_v2sf_v2sf;
17672 break;
17673 default:
17674 gcc_unreachable ();
17677 def_builtin (d->name, type, d->code);
17681 static void
17682 altivec_init_builtins (void)
17684 const struct builtin_description *d;
17685 size_t i;
17686 tree ftype;
17687 tree decl;
17688 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17690 tree pvoid_type_node = build_pointer_type (void_type_node);
17692 tree pcvoid_type_node
17693 = build_pointer_type (build_qualified_type (void_type_node,
17694 TYPE_QUAL_CONST));
17696 tree int_ftype_opaque
17697 = build_function_type_list (integer_type_node,
17698 opaque_V4SI_type_node, NULL_TREE);
17699 tree opaque_ftype_opaque
17700 = build_function_type_list (integer_type_node, NULL_TREE);
17701 tree opaque_ftype_opaque_int
17702 = build_function_type_list (opaque_V4SI_type_node,
17703 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17704 tree opaque_ftype_opaque_opaque_int
17705 = build_function_type_list (opaque_V4SI_type_node,
17706 opaque_V4SI_type_node, opaque_V4SI_type_node,
17707 integer_type_node, NULL_TREE);
17708 tree opaque_ftype_opaque_opaque_opaque
17709 = build_function_type_list (opaque_V4SI_type_node,
17710 opaque_V4SI_type_node, opaque_V4SI_type_node,
17711 opaque_V4SI_type_node, NULL_TREE);
17712 tree opaque_ftype_opaque_opaque
17713 = build_function_type_list (opaque_V4SI_type_node,
17714 opaque_V4SI_type_node, opaque_V4SI_type_node,
17715 NULL_TREE);
17716 tree int_ftype_int_opaque_opaque
17717 = build_function_type_list (integer_type_node,
17718 integer_type_node, opaque_V4SI_type_node,
17719 opaque_V4SI_type_node, NULL_TREE);
17720 tree int_ftype_int_v4si_v4si
17721 = build_function_type_list (integer_type_node,
17722 integer_type_node, V4SI_type_node,
17723 V4SI_type_node, NULL_TREE);
17724 tree int_ftype_int_v2di_v2di
17725 = build_function_type_list (integer_type_node,
17726 integer_type_node, V2DI_type_node,
17727 V2DI_type_node, NULL_TREE);
17728 tree void_ftype_v4si
17729 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17730 tree v8hi_ftype_void
17731 = build_function_type_list (V8HI_type_node, NULL_TREE);
17732 tree void_ftype_void
17733 = build_function_type_list (void_type_node, NULL_TREE);
17734 tree void_ftype_int
17735 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17737 tree opaque_ftype_long_pcvoid
17738 = build_function_type_list (opaque_V4SI_type_node,
17739 long_integer_type_node, pcvoid_type_node,
17740 NULL_TREE);
17741 tree v16qi_ftype_long_pcvoid
17742 = build_function_type_list (V16QI_type_node,
17743 long_integer_type_node, pcvoid_type_node,
17744 NULL_TREE);
17745 tree v8hi_ftype_long_pcvoid
17746 = build_function_type_list (V8HI_type_node,
17747 long_integer_type_node, pcvoid_type_node,
17748 NULL_TREE);
17749 tree v4si_ftype_long_pcvoid
17750 = build_function_type_list (V4SI_type_node,
17751 long_integer_type_node, pcvoid_type_node,
17752 NULL_TREE);
17753 tree v4sf_ftype_long_pcvoid
17754 = build_function_type_list (V4SF_type_node,
17755 long_integer_type_node, pcvoid_type_node,
17756 NULL_TREE);
17757 tree v2df_ftype_long_pcvoid
17758 = build_function_type_list (V2DF_type_node,
17759 long_integer_type_node, pcvoid_type_node,
17760 NULL_TREE);
17761 tree v2di_ftype_long_pcvoid
17762 = build_function_type_list (V2DI_type_node,
17763 long_integer_type_node, pcvoid_type_node,
17764 NULL_TREE);
17766 tree void_ftype_opaque_long_pvoid
17767 = build_function_type_list (void_type_node,
17768 opaque_V4SI_type_node, long_integer_type_node,
17769 pvoid_type_node, NULL_TREE);
17770 tree void_ftype_v4si_long_pvoid
17771 = build_function_type_list (void_type_node,
17772 V4SI_type_node, long_integer_type_node,
17773 pvoid_type_node, NULL_TREE);
17774 tree void_ftype_v16qi_long_pvoid
17775 = build_function_type_list (void_type_node,
17776 V16QI_type_node, long_integer_type_node,
17777 pvoid_type_node, NULL_TREE);
17779 tree void_ftype_v16qi_pvoid_long
17780 = build_function_type_list (void_type_node,
17781 V16QI_type_node, pvoid_type_node,
17782 long_integer_type_node, NULL_TREE);
17784 tree void_ftype_v8hi_long_pvoid
17785 = build_function_type_list (void_type_node,
17786 V8HI_type_node, long_integer_type_node,
17787 pvoid_type_node, NULL_TREE);
17788 tree void_ftype_v4sf_long_pvoid
17789 = build_function_type_list (void_type_node,
17790 V4SF_type_node, long_integer_type_node,
17791 pvoid_type_node, NULL_TREE);
17792 tree void_ftype_v2df_long_pvoid
17793 = build_function_type_list (void_type_node,
17794 V2DF_type_node, long_integer_type_node,
17795 pvoid_type_node, NULL_TREE);
17796 tree void_ftype_v2di_long_pvoid
17797 = build_function_type_list (void_type_node,
17798 V2DI_type_node, long_integer_type_node,
17799 pvoid_type_node, NULL_TREE);
17800 tree int_ftype_int_v8hi_v8hi
17801 = build_function_type_list (integer_type_node,
17802 integer_type_node, V8HI_type_node,
17803 V8HI_type_node, NULL_TREE);
17804 tree int_ftype_int_v16qi_v16qi
17805 = build_function_type_list (integer_type_node,
17806 integer_type_node, V16QI_type_node,
17807 V16QI_type_node, NULL_TREE);
17808 tree int_ftype_int_v4sf_v4sf
17809 = build_function_type_list (integer_type_node,
17810 integer_type_node, V4SF_type_node,
17811 V4SF_type_node, NULL_TREE);
17812 tree int_ftype_int_v2df_v2df
17813 = build_function_type_list (integer_type_node,
17814 integer_type_node, V2DF_type_node,
17815 V2DF_type_node, NULL_TREE);
17816 tree v2di_ftype_v2di
17817 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17818 tree v4si_ftype_v4si
17819 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17820 tree v8hi_ftype_v8hi
17821 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17822 tree v16qi_ftype_v16qi
17823 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17824 tree v4sf_ftype_v4sf
17825 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17826 tree v2df_ftype_v2df
17827 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17828 tree void_ftype_pcvoid_int_int
17829 = build_function_type_list (void_type_node,
17830 pcvoid_type_node, integer_type_node,
17831 integer_type_node, NULL_TREE);
17833 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17834 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17835 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17836 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17837 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17838 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17839 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17840 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17841 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17842 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17843 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17844 ALTIVEC_BUILTIN_LVXL_V2DF);
17845 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17846 ALTIVEC_BUILTIN_LVXL_V2DI);
17847 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17848 ALTIVEC_BUILTIN_LVXL_V4SF);
17849 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17850 ALTIVEC_BUILTIN_LVXL_V4SI);
17851 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17852 ALTIVEC_BUILTIN_LVXL_V8HI);
17853 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17854 ALTIVEC_BUILTIN_LVXL_V16QI);
17855 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17856 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17857 ALTIVEC_BUILTIN_LVX_V2DF);
17858 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17859 ALTIVEC_BUILTIN_LVX_V2DI);
17860 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17861 ALTIVEC_BUILTIN_LVX_V4SF);
17862 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17863 ALTIVEC_BUILTIN_LVX_V4SI);
17864 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17865 ALTIVEC_BUILTIN_LVX_V8HI);
17866 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17867 ALTIVEC_BUILTIN_LVX_V16QI);
17868 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17869 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17870 ALTIVEC_BUILTIN_STVX_V2DF);
17871 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17872 ALTIVEC_BUILTIN_STVX_V2DI);
17873 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17874 ALTIVEC_BUILTIN_STVX_V4SF);
17875 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17876 ALTIVEC_BUILTIN_STVX_V4SI);
17877 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17878 ALTIVEC_BUILTIN_STVX_V8HI);
17879 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17880 ALTIVEC_BUILTIN_STVX_V16QI);
17881 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17882 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17883 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17884 ALTIVEC_BUILTIN_STVXL_V2DF);
17885 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17886 ALTIVEC_BUILTIN_STVXL_V2DI);
17887 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17888 ALTIVEC_BUILTIN_STVXL_V4SF);
17889 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17890 ALTIVEC_BUILTIN_STVXL_V4SI);
17891 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17892 ALTIVEC_BUILTIN_STVXL_V8HI);
17893 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17894 ALTIVEC_BUILTIN_STVXL_V16QI);
17895 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17896 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17897 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17898 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17899 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17900 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17901 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17902 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17903 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17904 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17905 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17906 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17907 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17908 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17909 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17910 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17912 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17913 VSX_BUILTIN_LXVD2X_V2DF);
17914 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17915 VSX_BUILTIN_LXVD2X_V2DI);
17916 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17917 VSX_BUILTIN_LXVW4X_V4SF);
17918 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17919 VSX_BUILTIN_LXVW4X_V4SI);
17920 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17921 VSX_BUILTIN_LXVW4X_V8HI);
17922 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17923 VSX_BUILTIN_LXVW4X_V16QI);
17924 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17925 VSX_BUILTIN_STXVD2X_V2DF);
17926 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17927 VSX_BUILTIN_STXVD2X_V2DI);
17928 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17929 VSX_BUILTIN_STXVW4X_V4SF);
17930 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17931 VSX_BUILTIN_STXVW4X_V4SI);
17932 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17933 VSX_BUILTIN_STXVW4X_V8HI);
17934 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17935 VSX_BUILTIN_STXVW4X_V16QI);
17937 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17938 VSX_BUILTIN_LD_ELEMREV_V2DF);
17939 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17940 VSX_BUILTIN_LD_ELEMREV_V2DI);
17941 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17942 VSX_BUILTIN_LD_ELEMREV_V4SF);
17943 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17944 VSX_BUILTIN_LD_ELEMREV_V4SI);
17945 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17946 VSX_BUILTIN_ST_ELEMREV_V2DF);
17947 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17948 VSX_BUILTIN_ST_ELEMREV_V2DI);
17949 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17950 VSX_BUILTIN_ST_ELEMREV_V4SF);
17951 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17952 VSX_BUILTIN_ST_ELEMREV_V4SI);
17954 if (TARGET_P9_VECTOR)
17956 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17957 VSX_BUILTIN_LD_ELEMREV_V8HI);
17958 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17959 VSX_BUILTIN_LD_ELEMREV_V16QI);
17960 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17961 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17962 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17963 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17966 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17967 VSX_BUILTIN_VEC_LD);
17968 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17969 VSX_BUILTIN_VEC_ST);
17970 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17971 VSX_BUILTIN_VEC_XL);
17972 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17973 VSX_BUILTIN_VEC_XST);
17975 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17976 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17977 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17979 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17980 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17981 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17982 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17983 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17984 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17985 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17986 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17987 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17988 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17989 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17990 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17992 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17993 ALTIVEC_BUILTIN_VEC_ADDE);
17994 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17995 ALTIVEC_BUILTIN_VEC_ADDEC);
17996 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17997 ALTIVEC_BUILTIN_VEC_CMPNE);
17998 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17999 ALTIVEC_BUILTIN_VEC_MUL);
18001 /* Cell builtins. */
18002 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
18003 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
18004 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
18005 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
18007 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
18008 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
18009 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
18010 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
18012 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
18013 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
18014 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
18015 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
18017 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
18018 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
18019 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
18020 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
18022 if (TARGET_P9_VECTOR)
18023 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
18024 P9V_BUILTIN_STXVL);
18026 /* Add the DST variants. */
18027 d = bdesc_dst;
18028 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
18030 HOST_WIDE_INT mask = d->mask;
18032 /* It is expected that these dst built-in functions may have
18033 d->icode equal to CODE_FOR_nothing. */
18034 if ((mask & builtin_mask) != mask)
18036 if (TARGET_DEBUG_BUILTIN)
18037 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
18038 d->name);
18039 continue;
18041 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
18044 /* Initialize the predicates. */
18045 d = bdesc_altivec_preds;
18046 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
18048 machine_mode mode1;
18049 tree type;
18050 HOST_WIDE_INT mask = d->mask;
18052 if ((mask & builtin_mask) != mask)
18054 if (TARGET_DEBUG_BUILTIN)
18055 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
18056 d->name);
18057 continue;
18060 if (rs6000_overloaded_builtin_p (d->code))
18061 mode1 = VOIDmode;
18062 else
18064 /* Cannot define builtin if the instruction is disabled. */
18065 gcc_assert (d->icode != CODE_FOR_nothing);
18066 mode1 = insn_data[d->icode].operand[1].mode;
18069 switch (mode1)
18071 case VOIDmode:
18072 type = int_ftype_int_opaque_opaque;
18073 break;
18074 case V2DImode:
18075 type = int_ftype_int_v2di_v2di;
18076 break;
18077 case V4SImode:
18078 type = int_ftype_int_v4si_v4si;
18079 break;
18080 case V8HImode:
18081 type = int_ftype_int_v8hi_v8hi;
18082 break;
18083 case V16QImode:
18084 type = int_ftype_int_v16qi_v16qi;
18085 break;
18086 case V4SFmode:
18087 type = int_ftype_int_v4sf_v4sf;
18088 break;
18089 case V2DFmode:
18090 type = int_ftype_int_v2df_v2df;
18091 break;
18092 default:
18093 gcc_unreachable ();
18096 def_builtin (d->name, type, d->code);
18099 /* Initialize the abs* operators. */
18100 d = bdesc_abs;
18101 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
18103 machine_mode mode0;
18104 tree type;
18105 HOST_WIDE_INT mask = d->mask;
18107 if ((mask & builtin_mask) != mask)
18109 if (TARGET_DEBUG_BUILTIN)
18110 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
18111 d->name);
18112 continue;
18115 /* Cannot define builtin if the instruction is disabled. */
18116 gcc_assert (d->icode != CODE_FOR_nothing);
18117 mode0 = insn_data[d->icode].operand[0].mode;
18119 switch (mode0)
18121 case V2DImode:
18122 type = v2di_ftype_v2di;
18123 break;
18124 case V4SImode:
18125 type = v4si_ftype_v4si;
18126 break;
18127 case V8HImode:
18128 type = v8hi_ftype_v8hi;
18129 break;
18130 case V16QImode:
18131 type = v16qi_ftype_v16qi;
18132 break;
18133 case V4SFmode:
18134 type = v4sf_ftype_v4sf;
18135 break;
18136 case V2DFmode:
18137 type = v2df_ftype_v2df;
18138 break;
18139 default:
18140 gcc_unreachable ();
18143 def_builtin (d->name, type, d->code);
18146 /* Initialize target builtin that implements
18147 targetm.vectorize.builtin_mask_for_load. */
18149 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
18150 v16qi_ftype_long_pcvoid,
18151 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
18152 BUILT_IN_MD, NULL, NULL_TREE);
18153 TREE_READONLY (decl) = 1;
18154 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18155 altivec_builtin_mask_for_load = decl;
18157 /* Access to the vec_init patterns. */
18158 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
18159 integer_type_node, integer_type_node,
18160 integer_type_node, NULL_TREE);
18161 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
18163 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
18164 short_integer_type_node,
18165 short_integer_type_node,
18166 short_integer_type_node,
18167 short_integer_type_node,
18168 short_integer_type_node,
18169 short_integer_type_node,
18170 short_integer_type_node, NULL_TREE);
18171 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
18173 ftype = build_function_type_list (V16QI_type_node, char_type_node,
18174 char_type_node, char_type_node,
18175 char_type_node, char_type_node,
18176 char_type_node, char_type_node,
18177 char_type_node, char_type_node,
18178 char_type_node, char_type_node,
18179 char_type_node, char_type_node,
18180 char_type_node, char_type_node,
18181 char_type_node, NULL_TREE);
18182 def_builtin ("__builtin_vec_init_v16qi", ftype,
18183 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
18185 ftype = build_function_type_list (V4SF_type_node, float_type_node,
18186 float_type_node, float_type_node,
18187 float_type_node, NULL_TREE);
18188 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
18190 /* VSX builtins. */
18191 ftype = build_function_type_list (V2DF_type_node, double_type_node,
18192 double_type_node, NULL_TREE);
18193 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
18195 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
18196 intDI_type_node, NULL_TREE);
18197 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
18199 /* Access to the vec_set patterns. */
18200 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18201 intSI_type_node,
18202 integer_type_node, NULL_TREE);
18203 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
18205 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18206 intHI_type_node,
18207 integer_type_node, NULL_TREE);
18208 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
18210 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18211 intQI_type_node,
18212 integer_type_node, NULL_TREE);
18213 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
18215 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18216 float_type_node,
18217 integer_type_node, NULL_TREE);
18218 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
18220 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
18221 double_type_node,
18222 integer_type_node, NULL_TREE);
18223 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
18225 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18226 intDI_type_node,
18227 integer_type_node, NULL_TREE);
18228 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
18230 /* Access to the vec_extract patterns. */
18231 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18232 integer_type_node, NULL_TREE);
18233 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
18235 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18236 integer_type_node, NULL_TREE);
18237 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
18239 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18240 integer_type_node, NULL_TREE);
18241 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
18243 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18244 integer_type_node, NULL_TREE);
18245 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
18247 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18248 integer_type_node, NULL_TREE);
18249 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
18251 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
18252 integer_type_node, NULL_TREE);
18253 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
18256 if (V1TI_type_node)
18258 tree v1ti_ftype_long_pcvoid
18259 = build_function_type_list (V1TI_type_node,
18260 long_integer_type_node, pcvoid_type_node,
18261 NULL_TREE);
18262 tree void_ftype_v1ti_long_pvoid
18263 = build_function_type_list (void_type_node,
18264 V1TI_type_node, long_integer_type_node,
18265 pvoid_type_node, NULL_TREE);
18266 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
18267 VSX_BUILTIN_LXVD2X_V1TI);
18268 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
18269 VSX_BUILTIN_STXVD2X_V1TI);
18270 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
18271 NULL_TREE, NULL_TREE);
18272 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
18273 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
18274 intTI_type_node,
18275 integer_type_node, NULL_TREE);
18276 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
18277 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
18278 integer_type_node, NULL_TREE);
18279 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
18284 static void
18285 htm_init_builtins (void)
18287 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18288 const struct builtin_description *d;
18289 size_t i;
18291 d = bdesc_htm;
18292 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
18294 tree op[MAX_HTM_OPERANDS], type;
18295 HOST_WIDE_INT mask = d->mask;
18296 unsigned attr = rs6000_builtin_info[d->code].attr;
18297 bool void_func = (attr & RS6000_BTC_VOID);
18298 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
18299 int nopnds = 0;
18300 tree gpr_type_node;
18301 tree rettype;
18302 tree argtype;
18304 /* It is expected that these htm built-in functions may have
18305 d->icode equal to CODE_FOR_nothing. */
18307 if (TARGET_32BIT && TARGET_POWERPC64)
18308 gpr_type_node = long_long_unsigned_type_node;
18309 else
18310 gpr_type_node = long_unsigned_type_node;
18312 if (attr & RS6000_BTC_SPR)
18314 rettype = gpr_type_node;
18315 argtype = gpr_type_node;
18317 else if (d->code == HTM_BUILTIN_TABORTDC
18318 || d->code == HTM_BUILTIN_TABORTDCI)
18320 rettype = unsigned_type_node;
18321 argtype = gpr_type_node;
18323 else
18325 rettype = unsigned_type_node;
18326 argtype = unsigned_type_node;
18329 if ((mask & builtin_mask) != mask)
18331 if (TARGET_DEBUG_BUILTIN)
18332 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
18333 continue;
18336 if (d->name == 0)
18338 if (TARGET_DEBUG_BUILTIN)
18339 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
18340 (long unsigned) i);
18341 continue;
18344 op[nopnds++] = (void_func) ? void_type_node : rettype;
18346 if (attr_args == RS6000_BTC_UNARY)
18347 op[nopnds++] = argtype;
18348 else if (attr_args == RS6000_BTC_BINARY)
18350 op[nopnds++] = argtype;
18351 op[nopnds++] = argtype;
18353 else if (attr_args == RS6000_BTC_TERNARY)
18355 op[nopnds++] = argtype;
18356 op[nopnds++] = argtype;
18357 op[nopnds++] = argtype;
18360 switch (nopnds)
18362 case 1:
18363 type = build_function_type_list (op[0], NULL_TREE);
18364 break;
18365 case 2:
18366 type = build_function_type_list (op[0], op[1], NULL_TREE);
18367 break;
18368 case 3:
18369 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
18370 break;
18371 case 4:
18372 type = build_function_type_list (op[0], op[1], op[2], op[3],
18373 NULL_TREE);
18374 break;
18375 default:
18376 gcc_unreachable ();
18379 def_builtin (d->name, type, d->code);
18383 /* Hash function for builtin functions with up to 3 arguments and a return
18384 type. */
18385 hashval_t
18386 builtin_hasher::hash (builtin_hash_struct *bh)
18388 unsigned ret = 0;
18389 int i;
18391 for (i = 0; i < 4; i++)
18393 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
18394 ret = (ret * 2) + bh->uns_p[i];
18397 return ret;
18400 /* Compare builtin hash entries H1 and H2 for equivalence. */
18401 bool
18402 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
18404 return ((p1->mode[0] == p2->mode[0])
18405 && (p1->mode[1] == p2->mode[1])
18406 && (p1->mode[2] == p2->mode[2])
18407 && (p1->mode[3] == p2->mode[3])
18408 && (p1->uns_p[0] == p2->uns_p[0])
18409 && (p1->uns_p[1] == p2->uns_p[1])
18410 && (p1->uns_p[2] == p2->uns_p[2])
18411 && (p1->uns_p[3] == p2->uns_p[3]));
18414 /* Map types for builtin functions with an explicit return type and up to 3
18415 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18416 of the argument. */
18417 static tree
18418 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
18419 machine_mode mode_arg1, machine_mode mode_arg2,
18420 enum rs6000_builtins builtin, const char *name)
18422 struct builtin_hash_struct h;
18423 struct builtin_hash_struct *h2;
18424 int num_args = 3;
18425 int i;
18426 tree ret_type = NULL_TREE;
18427 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
18429 /* Create builtin_hash_table. */
18430 if (builtin_hash_table == NULL)
18431 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
18433 h.type = NULL_TREE;
18434 h.mode[0] = mode_ret;
18435 h.mode[1] = mode_arg0;
18436 h.mode[2] = mode_arg1;
18437 h.mode[3] = mode_arg2;
18438 h.uns_p[0] = 0;
18439 h.uns_p[1] = 0;
18440 h.uns_p[2] = 0;
18441 h.uns_p[3] = 0;
18443 /* If the builtin is a type that produces unsigned results or takes unsigned
18444 arguments, and it is returned as a decl for the vectorizer (such as
18445 widening multiplies, permute), make sure the arguments and return value
18446 are type correct. */
18447 switch (builtin)
18449 /* unsigned 1 argument functions. */
18450 case CRYPTO_BUILTIN_VSBOX:
18451 case P8V_BUILTIN_VGBBD:
18452 case MISC_BUILTIN_CDTBCD:
18453 case MISC_BUILTIN_CBCDTD:
18454 h.uns_p[0] = 1;
18455 h.uns_p[1] = 1;
18456 break;
18458 /* unsigned 2 argument functions. */
18459 case ALTIVEC_BUILTIN_VMULEUB_UNS:
18460 case ALTIVEC_BUILTIN_VMULEUH_UNS:
18461 case ALTIVEC_BUILTIN_VMULOUB_UNS:
18462 case ALTIVEC_BUILTIN_VMULOUH_UNS:
18463 case CRYPTO_BUILTIN_VCIPHER:
18464 case CRYPTO_BUILTIN_VCIPHERLAST:
18465 case CRYPTO_BUILTIN_VNCIPHER:
18466 case CRYPTO_BUILTIN_VNCIPHERLAST:
18467 case CRYPTO_BUILTIN_VPMSUMB:
18468 case CRYPTO_BUILTIN_VPMSUMH:
18469 case CRYPTO_BUILTIN_VPMSUMW:
18470 case CRYPTO_BUILTIN_VPMSUMD:
18471 case CRYPTO_BUILTIN_VPMSUM:
18472 case MISC_BUILTIN_ADDG6S:
18473 case MISC_BUILTIN_DIVWEU:
18474 case MISC_BUILTIN_DIVWEUO:
18475 case MISC_BUILTIN_DIVDEU:
18476 case MISC_BUILTIN_DIVDEUO:
18477 h.uns_p[0] = 1;
18478 h.uns_p[1] = 1;
18479 h.uns_p[2] = 1;
18480 break;
18482 /* unsigned 3 argument functions. */
18483 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18484 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18485 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18486 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18487 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18488 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18489 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18490 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18491 case VSX_BUILTIN_VPERM_16QI_UNS:
18492 case VSX_BUILTIN_VPERM_8HI_UNS:
18493 case VSX_BUILTIN_VPERM_4SI_UNS:
18494 case VSX_BUILTIN_VPERM_2DI_UNS:
18495 case VSX_BUILTIN_XXSEL_16QI_UNS:
18496 case VSX_BUILTIN_XXSEL_8HI_UNS:
18497 case VSX_BUILTIN_XXSEL_4SI_UNS:
18498 case VSX_BUILTIN_XXSEL_2DI_UNS:
18499 case CRYPTO_BUILTIN_VPERMXOR:
18500 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18501 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18502 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18503 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18504 case CRYPTO_BUILTIN_VSHASIGMAW:
18505 case CRYPTO_BUILTIN_VSHASIGMAD:
18506 case CRYPTO_BUILTIN_VSHASIGMA:
18507 h.uns_p[0] = 1;
18508 h.uns_p[1] = 1;
18509 h.uns_p[2] = 1;
18510 h.uns_p[3] = 1;
18511 break;
18513 /* signed permute functions with unsigned char mask. */
18514 case ALTIVEC_BUILTIN_VPERM_16QI:
18515 case ALTIVEC_BUILTIN_VPERM_8HI:
18516 case ALTIVEC_BUILTIN_VPERM_4SI:
18517 case ALTIVEC_BUILTIN_VPERM_4SF:
18518 case ALTIVEC_BUILTIN_VPERM_2DI:
18519 case ALTIVEC_BUILTIN_VPERM_2DF:
18520 case VSX_BUILTIN_VPERM_16QI:
18521 case VSX_BUILTIN_VPERM_8HI:
18522 case VSX_BUILTIN_VPERM_4SI:
18523 case VSX_BUILTIN_VPERM_4SF:
18524 case VSX_BUILTIN_VPERM_2DI:
18525 case VSX_BUILTIN_VPERM_2DF:
18526 h.uns_p[3] = 1;
18527 break;
18529 /* unsigned args, signed return. */
18530 case VSX_BUILTIN_XVCVUXDDP_UNS:
18531 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18532 h.uns_p[1] = 1;
18533 break;
18535 /* signed args, unsigned return. */
18536 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18537 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18538 case MISC_BUILTIN_UNPACK_TD:
18539 case MISC_BUILTIN_UNPACK_V1TI:
18540 h.uns_p[0] = 1;
18541 break;
18543 /* unsigned arguments for 128-bit pack instructions. */
18544 case MISC_BUILTIN_PACK_TD:
18545 case MISC_BUILTIN_PACK_V1TI:
18546 h.uns_p[1] = 1;
18547 h.uns_p[2] = 1;
18548 break;
18550 default:
18551 break;
18554 /* Figure out how many args are present. */
18555 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18556 num_args--;
18558 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18559 if (!ret_type && h.uns_p[0])
18560 ret_type = builtin_mode_to_type[h.mode[0]][0];
18562 if (!ret_type)
18563 fatal_error (input_location,
18564 "internal error: builtin function %s had an unexpected "
18565 "return type %s", name, GET_MODE_NAME (h.mode[0]));
18567 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18568 arg_type[i] = NULL_TREE;
18570 for (i = 0; i < num_args; i++)
18572 int m = (int) h.mode[i+1];
18573 int uns_p = h.uns_p[i+1];
18575 arg_type[i] = builtin_mode_to_type[m][uns_p];
18576 if (!arg_type[i] && uns_p)
18577 arg_type[i] = builtin_mode_to_type[m][0];
18579 if (!arg_type[i])
18580 fatal_error (input_location,
18581 "internal error: builtin function %s, argument %d "
18582 "had unexpected argument type %s", name, i,
18583 GET_MODE_NAME (m));
18586 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18587 if (*found == NULL)
18589 h2 = ggc_alloc<builtin_hash_struct> ();
18590 *h2 = h;
18591 *found = h2;
18593 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18594 arg_type[2], NULL_TREE);
18597 return (*found)->type;
18600 static void
18601 rs6000_common_init_builtins (void)
18603 const struct builtin_description *d;
18604 size_t i;
18606 tree opaque_ftype_opaque = NULL_TREE;
18607 tree opaque_ftype_opaque_opaque = NULL_TREE;
18608 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18609 tree v2si_ftype = NULL_TREE;
18610 tree v2si_ftype_qi = NULL_TREE;
18611 tree v2si_ftype_v2si_qi = NULL_TREE;
18612 tree v2si_ftype_int_qi = NULL_TREE;
18613 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18615 if (!TARGET_PAIRED_FLOAT)
18617 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18618 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18621 /* Paired and SPE builtins are only available if you build a compiler with
18622 the appropriate options, so only create those builtins with the
18623 appropriate compiler option. Create Altivec and VSX builtins on machines
18624 with at least the general purpose extensions (970 and newer) to allow the
18625 use of the target attribute.. */
18627 if (TARGET_EXTRA_BUILTINS)
18628 builtin_mask |= RS6000_BTM_COMMON;
18630 /* Add the ternary operators. */
18631 d = bdesc_3arg;
18632 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18634 tree type;
18635 HOST_WIDE_INT mask = d->mask;
18637 if ((mask & builtin_mask) != mask)
18639 if (TARGET_DEBUG_BUILTIN)
18640 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18641 continue;
18644 if (rs6000_overloaded_builtin_p (d->code))
18646 if (! (type = opaque_ftype_opaque_opaque_opaque))
18647 type = opaque_ftype_opaque_opaque_opaque
18648 = build_function_type_list (opaque_V4SI_type_node,
18649 opaque_V4SI_type_node,
18650 opaque_V4SI_type_node,
18651 opaque_V4SI_type_node,
18652 NULL_TREE);
18654 else
18656 enum insn_code icode = d->icode;
18657 if (d->name == 0)
18659 if (TARGET_DEBUG_BUILTIN)
18660 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18661 (long unsigned)i);
18663 continue;
18666 if (icode == CODE_FOR_nothing)
18668 if (TARGET_DEBUG_BUILTIN)
18669 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18670 d->name);
18672 continue;
18675 type = builtin_function_type (insn_data[icode].operand[0].mode,
18676 insn_data[icode].operand[1].mode,
18677 insn_data[icode].operand[2].mode,
18678 insn_data[icode].operand[3].mode,
18679 d->code, d->name);
18682 def_builtin (d->name, type, d->code);
18685 /* Add the binary operators. */
18686 d = bdesc_2arg;
18687 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18689 machine_mode mode0, mode1, mode2;
18690 tree type;
18691 HOST_WIDE_INT mask = d->mask;
18693 if ((mask & builtin_mask) != mask)
18695 if (TARGET_DEBUG_BUILTIN)
18696 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18697 continue;
18700 if (rs6000_overloaded_builtin_p (d->code))
18702 if (! (type = opaque_ftype_opaque_opaque))
18703 type = opaque_ftype_opaque_opaque
18704 = build_function_type_list (opaque_V4SI_type_node,
18705 opaque_V4SI_type_node,
18706 opaque_V4SI_type_node,
18707 NULL_TREE);
18709 else
18711 enum insn_code icode = d->icode;
18712 if (d->name == 0)
18714 if (TARGET_DEBUG_BUILTIN)
18715 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18716 (long unsigned)i);
18718 continue;
18721 if (icode == CODE_FOR_nothing)
18723 if (TARGET_DEBUG_BUILTIN)
18724 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18725 d->name);
18727 continue;
18730 mode0 = insn_data[icode].operand[0].mode;
18731 mode1 = insn_data[icode].operand[1].mode;
18732 mode2 = insn_data[icode].operand[2].mode;
18734 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18736 if (! (type = v2si_ftype_v2si_qi))
18737 type = v2si_ftype_v2si_qi
18738 = build_function_type_list (opaque_V2SI_type_node,
18739 opaque_V2SI_type_node,
18740 char_type_node,
18741 NULL_TREE);
18744 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18745 && mode2 == QImode)
18747 if (! (type = v2si_ftype_int_qi))
18748 type = v2si_ftype_int_qi
18749 = build_function_type_list (opaque_V2SI_type_node,
18750 integer_type_node,
18751 char_type_node,
18752 NULL_TREE);
18755 else
18756 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18757 d->code, d->name);
18760 def_builtin (d->name, type, d->code);
18763 /* Add the simple unary operators. */
18764 d = bdesc_1arg;
18765 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18767 machine_mode mode0, mode1;
18768 tree type;
18769 HOST_WIDE_INT mask = d->mask;
18771 if ((mask & builtin_mask) != mask)
18773 if (TARGET_DEBUG_BUILTIN)
18774 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18775 continue;
18778 if (rs6000_overloaded_builtin_p (d->code))
18780 if (! (type = opaque_ftype_opaque))
18781 type = opaque_ftype_opaque
18782 = build_function_type_list (opaque_V4SI_type_node,
18783 opaque_V4SI_type_node,
18784 NULL_TREE);
18786 else
18788 enum insn_code icode = d->icode;
18789 if (d->name == 0)
18791 if (TARGET_DEBUG_BUILTIN)
18792 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18793 (long unsigned)i);
18795 continue;
18798 if (icode == CODE_FOR_nothing)
18800 if (TARGET_DEBUG_BUILTIN)
18801 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18802 d->name);
18804 continue;
18807 mode0 = insn_data[icode].operand[0].mode;
18808 mode1 = insn_data[icode].operand[1].mode;
18810 if (mode0 == V2SImode && mode1 == QImode)
18812 if (! (type = v2si_ftype_qi))
18813 type = v2si_ftype_qi
18814 = build_function_type_list (opaque_V2SI_type_node,
18815 char_type_node,
18816 NULL_TREE);
18819 else
18820 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18821 d->code, d->name);
18824 def_builtin (d->name, type, d->code);
18827 /* Add the simple no-argument operators. */
18828 d = bdesc_0arg;
18829 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18831 machine_mode mode0;
18832 tree type;
18833 HOST_WIDE_INT mask = d->mask;
18835 if ((mask & builtin_mask) != mask)
18837 if (TARGET_DEBUG_BUILTIN)
18838 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18839 continue;
18841 if (rs6000_overloaded_builtin_p (d->code))
18843 if (!opaque_ftype_opaque)
18844 opaque_ftype_opaque
18845 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18846 type = opaque_ftype_opaque;
18848 else
18850 enum insn_code icode = d->icode;
18851 if (d->name == 0)
18853 if (TARGET_DEBUG_BUILTIN)
18854 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18855 (long unsigned) i);
18856 continue;
18858 if (icode == CODE_FOR_nothing)
18860 if (TARGET_DEBUG_BUILTIN)
18861 fprintf (stderr,
18862 "rs6000_builtin, skip no-argument %s (no code)\n",
18863 d->name);
18864 continue;
18866 mode0 = insn_data[icode].operand[0].mode;
18867 if (mode0 == V2SImode)
18869 /* code for SPE */
18870 if (! (type = v2si_ftype))
18872 v2si_ftype
18873 = build_function_type_list (opaque_V2SI_type_node,
18874 NULL_TREE);
18875 type = v2si_ftype;
18878 else
18879 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18880 d->code, d->name);
18882 def_builtin (d->name, type, d->code);
18886 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18887 static void
18888 init_float128_ibm (machine_mode mode)
18890 if (!TARGET_XL_COMPAT)
18892 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18893 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18894 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18895 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18897 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
18899 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18900 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18901 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18902 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18903 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18904 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18905 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18907 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18908 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18909 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18910 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18911 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18912 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18913 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18914 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18917 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
18918 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18920 else
18922 set_optab_libfunc (add_optab, mode, "_xlqadd");
18923 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18924 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18925 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18928 /* Add various conversions for IFmode to use the traditional TFmode
18929 names. */
18930 if (mode == IFmode)
18932 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18933 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18934 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18935 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18936 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18937 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18939 if (TARGET_POWERPC64)
18941 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18942 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18943 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18944 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18949 /* Set up IEEE 128-bit floating point routines. Use different names if the
18950 arguments can be passed in a vector register. The historical PowerPC
18951 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18952 continue to use that if we aren't using vector registers to pass IEEE
18953 128-bit floating point. */
18955 static void
18956 init_float128_ieee (machine_mode mode)
18958 if (FLOAT128_VECTOR_P (mode))
18960 set_optab_libfunc (add_optab, mode, "__addkf3");
18961 set_optab_libfunc (sub_optab, mode, "__subkf3");
18962 set_optab_libfunc (neg_optab, mode, "__negkf2");
18963 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18964 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18965 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18966 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18968 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18969 set_optab_libfunc (ne_optab, mode, "__nekf2");
18970 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18971 set_optab_libfunc (ge_optab, mode, "__gekf2");
18972 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18973 set_optab_libfunc (le_optab, mode, "__lekf2");
18974 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18976 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18977 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18978 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18979 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18981 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18982 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18983 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18985 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18986 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18987 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18989 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18990 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18991 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18992 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18993 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18994 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18996 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18997 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18998 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18999 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
19001 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
19002 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
19003 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
19004 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
19006 if (TARGET_POWERPC64)
19008 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
19009 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
19010 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
19011 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
19015 else
19017 set_optab_libfunc (add_optab, mode, "_q_add");
19018 set_optab_libfunc (sub_optab, mode, "_q_sub");
19019 set_optab_libfunc (neg_optab, mode, "_q_neg");
19020 set_optab_libfunc (smul_optab, mode, "_q_mul");
19021 set_optab_libfunc (sdiv_optab, mode, "_q_div");
19022 if (TARGET_PPC_GPOPT)
19023 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
19025 set_optab_libfunc (eq_optab, mode, "_q_feq");
19026 set_optab_libfunc (ne_optab, mode, "_q_fne");
19027 set_optab_libfunc (gt_optab, mode, "_q_fgt");
19028 set_optab_libfunc (ge_optab, mode, "_q_fge");
19029 set_optab_libfunc (lt_optab, mode, "_q_flt");
19030 set_optab_libfunc (le_optab, mode, "_q_fle");
19032 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
19033 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
19034 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
19035 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
19036 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
19037 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
19038 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
19039 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
19043 static void
19044 rs6000_init_libfuncs (void)
19046 /* __float128 support. */
19047 if (TARGET_FLOAT128_TYPE)
19049 init_float128_ibm (IFmode);
19050 init_float128_ieee (KFmode);
19053 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19054 if (TARGET_LONG_DOUBLE_128)
19056 if (!TARGET_IEEEQUAD)
19057 init_float128_ibm (TFmode);
19059 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19060 else
19061 init_float128_ieee (TFmode);
19066 /* Expand a block clear operation, and return 1 if successful. Return 0
19067 if we should let the compiler generate normal code.
19069 operands[0] is the destination
19070 operands[1] is the length
19071 operands[3] is the alignment */
19074 expand_block_clear (rtx operands[])
19076 rtx orig_dest = operands[0];
19077 rtx bytes_rtx = operands[1];
19078 rtx align_rtx = operands[3];
19079 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
19080 HOST_WIDE_INT align;
19081 HOST_WIDE_INT bytes;
19082 int offset;
19083 int clear_bytes;
19084 int clear_step;
19086 /* If this is not a fixed size move, just call memcpy */
19087 if (! constp)
19088 return 0;
19090 /* This must be a fixed size alignment */
19091 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19092 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19094 /* Anything to clear? */
19095 bytes = INTVAL (bytes_rtx);
19096 if (bytes <= 0)
19097 return 1;
19099 /* Use the builtin memset after a point, to avoid huge code bloat.
19100 When optimize_size, avoid any significant code bloat; calling
19101 memset is about 4 instructions, so allow for one instruction to
19102 load zero and three to do clearing. */
19103 if (TARGET_ALTIVEC && align >= 128)
19104 clear_step = 16;
19105 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
19106 clear_step = 8;
19107 else if (TARGET_SPE && align >= 64)
19108 clear_step = 8;
19109 else
19110 clear_step = 4;
19112 if (optimize_size && bytes > 3 * clear_step)
19113 return 0;
19114 if (! optimize_size && bytes > 8 * clear_step)
19115 return 0;
19117 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
19119 machine_mode mode = BLKmode;
19120 rtx dest;
19122 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
19124 clear_bytes = 16;
19125 mode = V4SImode;
19127 else if (bytes >= 8 && TARGET_SPE && align >= 64)
19129 clear_bytes = 8;
19130 mode = V2SImode;
19132 else if (bytes >= 8 && TARGET_POWERPC64
19133 && (align >= 64 || !STRICT_ALIGNMENT))
19135 clear_bytes = 8;
19136 mode = DImode;
19137 if (offset == 0 && align < 64)
19139 rtx addr;
19141 /* If the address form is reg+offset with offset not a
19142 multiple of four, reload into reg indirect form here
19143 rather than waiting for reload. This way we get one
19144 reload, not one per store. */
19145 addr = XEXP (orig_dest, 0);
19146 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19147 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19148 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19150 addr = copy_addr_to_reg (addr);
19151 orig_dest = replace_equiv_address (orig_dest, addr);
19155 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19156 { /* move 4 bytes */
19157 clear_bytes = 4;
19158 mode = SImode;
19160 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19161 { /* move 2 bytes */
19162 clear_bytes = 2;
19163 mode = HImode;
19165 else /* move 1 byte at a time */
19167 clear_bytes = 1;
19168 mode = QImode;
19171 dest = adjust_address (orig_dest, mode, offset);
19173 emit_move_insn (dest, CONST0_RTX (mode));
19176 return 1;
19179 /* Emit a potentially record-form instruction, setting DST from SRC.
19180 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19181 signed comparison of DST with zero. If DOT is 1, the generated RTL
19182 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19183 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19184 a separate COMPARE. */
19186 static void
19187 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
19189 if (dot == 0)
19191 emit_move_insn (dst, src);
19192 return;
19195 if (cc_reg_not_cr0_operand (ccreg, CCmode))
19197 emit_move_insn (dst, src);
19198 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
19199 return;
19202 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
19203 if (dot == 1)
19205 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
19206 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
19208 else
19210 rtx set = gen_rtx_SET (dst, src);
19211 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
19215 /* Figure out the correct instructions to generate to load data for
19216 block compare. MODE is used for the read from memory, and
19217 data is zero extended if REG is wider than MODE. If LE code
19218 is being generated, bswap loads are used.
19220 REG is the destination register to move the data into.
19221 MEM is the memory block being read.
19222 MODE is the mode of memory to use for the read. */
19223 static void
19224 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
19226 switch (GET_MODE (reg))
19228 case DImode:
19229 switch (mode)
19231 case QImode:
19232 emit_insn (gen_zero_extendqidi2 (reg, mem));
19233 break;
19234 case HImode:
19236 rtx src = mem;
19237 if (!BYTES_BIG_ENDIAN)
19239 src = gen_reg_rtx (HImode);
19240 emit_insn (gen_bswaphi2 (src, mem));
19242 emit_insn (gen_zero_extendhidi2 (reg, src));
19243 break;
19245 case SImode:
19247 rtx src = mem;
19248 if (!BYTES_BIG_ENDIAN)
19250 src = gen_reg_rtx (SImode);
19251 emit_insn (gen_bswapsi2 (src, mem));
19253 emit_insn (gen_zero_extendsidi2 (reg, src));
19255 break;
19256 case DImode:
19257 if (!BYTES_BIG_ENDIAN)
19258 emit_insn (gen_bswapdi2 (reg, mem));
19259 else
19260 emit_insn (gen_movdi (reg, mem));
19261 break;
19262 default:
19263 gcc_unreachable ();
19265 break;
19267 case SImode:
19268 switch (mode)
19270 case QImode:
19271 emit_insn (gen_zero_extendqisi2 (reg, mem));
19272 break;
19273 case HImode:
19275 rtx src = mem;
19276 if (!BYTES_BIG_ENDIAN)
19278 src = gen_reg_rtx (HImode);
19279 emit_insn (gen_bswaphi2 (src, mem));
19281 emit_insn (gen_zero_extendhisi2 (reg, src));
19282 break;
19284 case SImode:
19285 if (!BYTES_BIG_ENDIAN)
19286 emit_insn (gen_bswapsi2 (reg, mem));
19287 else
19288 emit_insn (gen_movsi (reg, mem));
19289 break;
19290 case DImode:
19291 /* DImode is larger than the destination reg so is not expected. */
19292 gcc_unreachable ();
19293 break;
19294 default:
19295 gcc_unreachable ();
19297 break;
19298 default:
19299 gcc_unreachable ();
19300 break;
19304 /* Select the mode to be used for reading the next chunk of bytes
19305 in the compare.
19307 OFFSET is the current read offset from the beginning of the block.
19308 BYTES is the number of bytes remaining to be read.
19309 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19310 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19311 the largest allowable mode. */
19312 static machine_mode
19313 select_block_compare_mode (HOST_WIDE_INT offset, HOST_WIDE_INT bytes,
19314 HOST_WIDE_INT align, bool word_mode_ok)
19316 /* First see if we can do a whole load unit
19317 as that will be more efficient than a larger load + shift. */
19319 /* If big, use biggest chunk.
19320 If exactly chunk size, use that size.
19321 If remainder can be done in one piece with shifting, do that.
19322 Do largest chunk possible without violating alignment rules. */
19324 /* The most we can read without potential page crossing. */
19325 HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
19327 if (word_mode_ok && bytes >= UNITS_PER_WORD)
19328 return word_mode;
19329 else if (bytes == GET_MODE_SIZE (SImode))
19330 return SImode;
19331 else if (bytes == GET_MODE_SIZE (HImode))
19332 return HImode;
19333 else if (bytes == GET_MODE_SIZE (QImode))
19334 return QImode;
19335 else if (bytes < GET_MODE_SIZE (SImode)
19336 && offset >= GET_MODE_SIZE (SImode) - bytes)
19337 /* This matches the case were we have SImode and 3 bytes
19338 and offset >= 1 and permits us to move back one and overlap
19339 with the previous read, thus avoiding having to shift
19340 unwanted bytes off of the input. */
19341 return SImode;
19342 else if (word_mode_ok && bytes < UNITS_PER_WORD
19343 && offset >= UNITS_PER_WORD-bytes)
19344 /* Similarly, if we can use DImode it will get matched here and
19345 can do an overlapping read that ends at the end of the block. */
19346 return word_mode;
19347 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
19348 /* It is safe to do all remaining in one load of largest size,
19349 possibly with a shift to get rid of unwanted bytes. */
19350 return word_mode;
19351 else if (maxread >= GET_MODE_SIZE (SImode))
19352 /* It is safe to do all remaining in one SImode load,
19353 possibly with a shift to get rid of unwanted bytes. */
19354 return SImode;
19355 else if (bytes > GET_MODE_SIZE (SImode))
19356 return SImode;
19357 else if (bytes > GET_MODE_SIZE (HImode))
19358 return HImode;
19360 /* final fallback is do one byte */
19361 return QImode;
19364 /* Compute the alignment of pointer+OFFSET where the original alignment
19365 of pointer was BASE_ALIGN. */
19366 static HOST_WIDE_INT
19367 compute_current_alignment (HOST_WIDE_INT base_align, HOST_WIDE_INT offset)
19369 if (offset == 0)
19370 return base_align;
19371 return min (base_align, offset & -offset);
19374 /* Expand a block compare operation, and return true if successful.
19375 Return false if we should let the compiler generate normal code,
19376 probably a memcmp call.
19378 OPERANDS[0] is the target (result).
19379 OPERANDS[1] is the first source.
19380 OPERANDS[2] is the second source.
19381 OPERANDS[3] is the length.
19382 OPERANDS[4] is the alignment. */
19383 bool
19384 expand_block_compare (rtx operands[])
19386 rtx target = operands[0];
19387 rtx orig_src1 = operands[1];
19388 rtx orig_src2 = operands[2];
19389 rtx bytes_rtx = operands[3];
19390 rtx align_rtx = operands[4];
19391 HOST_WIDE_INT cmp_bytes = 0;
19392 rtx src1 = orig_src1;
19393 rtx src2 = orig_src2;
19395 /* If this is not a fixed size compare, just call memcmp */
19396 if (!CONST_INT_P (bytes_rtx))
19397 return false;
19399 /* This must be a fixed size alignment */
19400 if (!CONST_INT_P (align_rtx))
19401 return false;
19403 int base_align = INTVAL (align_rtx) / BITS_PER_UNIT;
19405 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
19406 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
19407 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
19408 return false;
19410 gcc_assert (GET_MODE (target) == SImode);
19412 /* Anything to move? */
19413 HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
19414 if (bytes <= 0)
19415 return true;
19417 /* The code generated for p7 and older is not faster than glibc
19418 memcmp if alignment is small and length is not short, so bail
19419 out to avoid those conditions. */
19420 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
19421 && ((base_align == 1 && bytes > 16)
19422 || (base_align == 2 && bytes > 32)))
19423 return false;
19425 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
19426 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
19428 /* If we have an LE target without ldbrx and word_mode is DImode,
19429 then we must avoid using word_mode. */
19430 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
19431 && word_mode == DImode);
19433 /* Strategy phase. How many ops will this take and should we expand it? */
19435 int offset = 0;
19436 machine_mode load_mode =
19437 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
19438 int load_mode_size = GET_MODE_SIZE (load_mode);
19440 /* We don't want to generate too much code. */
19441 if (ROUND_UP (bytes, load_mode_size) / load_mode_size
19442 > rs6000_block_compare_inline_limit)
19443 return false;
19445 bool generate_6432_conversion = false;
19446 rtx convert_label = NULL;
19447 rtx final_label = NULL;
19449 /* Example of generated code for 11 bytes aligned 1 byte:
19450 .L10:
19451 ldbrx 10,6,9
19452 ldbrx 9,7,9
19453 subf. 9,9,10
19454 bne 0,.L8
19455 addi 9,4,7
19456 lwbrx 10,0,9
19457 addi 9,5,7
19458 lwbrx 9,0,9
19459 subf 9,9,10
19460 b .L9
19461 .L8: # convert_label
19462 cntlzd 9,9
19463 addi 9,9,-1
19464 xori 9,9,0x3f
19465 .L9: # final_label
19467 We start off with DImode and have a compare/branch to something
19468 with a smaller mode then we will need a block with the DI->SI conversion
19469 that may or may not be executed. */
19471 while (bytes > 0)
19473 int align = compute_current_alignment (base_align, offset);
19474 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19475 load_mode = select_block_compare_mode (offset, bytes, align,
19476 word_mode_ok);
19477 else
19478 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
19479 load_mode_size = GET_MODE_SIZE (load_mode);
19480 if (bytes >= load_mode_size)
19481 cmp_bytes = load_mode_size;
19482 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19484 /* Move this load back so it doesn't go past the end.
19485 P8/P9 can do this efficiently. */
19486 int extra_bytes = load_mode_size - bytes;
19487 cmp_bytes = bytes;
19488 if (extra_bytes < offset)
19490 offset -= extra_bytes;
19491 cmp_bytes = load_mode_size;
19492 bytes = cmp_bytes;
19495 else
19496 /* P7 and earlier can't do the overlapping load trick fast,
19497 so this forces a non-overlapping load and a shift to get
19498 rid of the extra bytes. */
19499 cmp_bytes = bytes;
19501 src1 = adjust_address (orig_src1, load_mode, offset);
19502 src2 = adjust_address (orig_src2, load_mode, offset);
19504 if (!REG_P (XEXP (src1, 0)))
19506 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
19507 src1 = replace_equiv_address (src1, src1_reg);
19509 set_mem_size (src1, cmp_bytes);
19511 if (!REG_P (XEXP (src2, 0)))
19513 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
19514 src2 = replace_equiv_address (src2, src2_reg);
19516 set_mem_size (src2, cmp_bytes);
19518 do_load_for_compare (tmp_reg_src1, src1, load_mode);
19519 do_load_for_compare (tmp_reg_src2, src2, load_mode);
19521 if (cmp_bytes < load_mode_size)
19523 /* Shift unneeded bytes off. */
19524 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
19525 if (word_mode == DImode)
19527 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
19528 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
19530 else
19532 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
19533 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
19537 /* We previously did a block that need 64->32 conversion but
19538 the current block does not, so a label is needed to jump
19539 to the end. */
19540 if (generate_6432_conversion && !final_label
19541 && GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size)
19542 final_label = gen_label_rtx ();
19544 /* Do we need a 64->32 conversion block? */
19545 int remain = bytes - cmp_bytes;
19546 if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode))
19548 generate_6432_conversion = true;
19549 if (remain > 0 && !convert_label)
19550 convert_label = gen_label_rtx ();
19553 if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode))
19555 /* Target is larger than load size so we don't need to
19556 reduce result size. */
19557 if (remain > 0)
19559 /* This is not the last block, branch to the end if the result
19560 of this subtract is not zero. */
19561 if (!final_label)
19562 final_label = gen_label_rtx ();
19563 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19564 rtx cond = gen_reg_rtx (CCmode);
19565 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
19566 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19567 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19568 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19569 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19570 fin_ref, pc_rtx);
19571 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19572 JUMP_LABEL (j) = final_label;
19573 LABEL_NUSES (final_label) += 1;
19575 else
19577 if (word_mode == DImode)
19579 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19580 tmp_reg_src2));
19581 emit_insn (gen_movsi (target,
19582 gen_lowpart (SImode, tmp_reg_src2)));
19584 else
19585 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
19587 if (final_label)
19589 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19590 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
19591 JUMP_LABEL(j) = final_label;
19592 LABEL_NUSES (final_label) += 1;
19593 emit_barrier ();
19597 else
19599 generate_6432_conversion = true;
19600 if (remain > 0)
19602 if (!convert_label)
19603 convert_label = gen_label_rtx ();
19605 /* Compare to zero and branch to convert_label if not zero. */
19606 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
19607 rtx cond = gen_reg_rtx (CCmode);
19608 rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2);
19609 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19610 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19611 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19612 cvt_ref, pc_rtx);
19613 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19614 JUMP_LABEL(j) = convert_label;
19615 LABEL_NUSES (convert_label) += 1;
19617 else
19619 /* Just do the subtract. Since this is the last block the
19620 convert code will be generated immediately following. */
19621 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19622 tmp_reg_src2));
19626 offset += cmp_bytes;
19627 bytes -= cmp_bytes;
19630 if (generate_6432_conversion)
19632 if (convert_label)
19633 emit_label (convert_label);
19635 /* We need to produce DI result from sub, then convert to target SI
19636 while maintaining <0 / ==0 / >0 properties.
19637 Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */
19638 emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2));
19639 emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1)));
19640 emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63)));
19641 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19644 if (final_label)
19645 emit_label (final_label);
19647 gcc_assert (bytes == 0);
19648 return true;
19651 /* Generate alignment check and branch code to set up for
19652 strncmp when we don't have DI alignment.
19653 STRNCMP_LABEL is the label to branch if there is a page crossing.
19654 SRC is the string pointer to be examined.
19655 BYTES is the max number of bytes to compare. */
19656 static void
19657 expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
19659 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
19660 rtx src_check = copy_addr_to_reg (XEXP (src, 0));
19661 if (GET_MODE (src_check) == SImode)
19662 emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
19663 else
19664 emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
19665 rtx cond = gen_reg_rtx (CCmode);
19666 emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
19667 GEN_INT (4096 - bytes)));
19669 rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
19671 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
19672 pc_rtx, lab_ref);
19673 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19674 JUMP_LABEL (j) = strncmp_label;
19675 LABEL_NUSES (strncmp_label) += 1;
19678 /* Expand a string compare operation with length, and return
19679 true if successful. Return false if we should let the
19680 compiler generate normal code, probably a strncmp call.
19682 OPERANDS[0] is the target (result).
19683 OPERANDS[1] is the first source.
19684 OPERANDS[2] is the second source.
19685 OPERANDS[3] is the length.
19686 OPERANDS[4] is the alignment in bytes. */
19687 bool
19688 expand_strn_compare (rtx operands[])
19690 rtx target = operands[0];
19691 rtx orig_src1 = operands[1];
19692 rtx orig_src2 = operands[2];
19693 rtx bytes_rtx = operands[3];
19694 rtx align_rtx = operands[4];
19695 HOST_WIDE_INT cmp_bytes = 0;
19696 rtx src1 = orig_src1;
19697 rtx src2 = orig_src2;
19699 /* If this is not a fixed size compare, just call strncmp. */
19700 if (!CONST_INT_P (bytes_rtx))
19701 return false;
19703 /* This must be a fixed size alignment. */
19704 if (!CONST_INT_P (align_rtx))
19705 return false;
19707 int base_align = INTVAL (align_rtx);
19708 int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
19709 int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
19711 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
19712 if (SLOW_UNALIGNED_ACCESS (word_mode, align1)
19713 || SLOW_UNALIGNED_ACCESS (word_mode, align2))
19714 return false;
19716 gcc_assert (GET_MODE (target) == SImode);
19718 HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
19720 /* If we have an LE target without ldbrx and word_mode is DImode,
19721 then we must avoid using word_mode. */
19722 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
19723 && word_mode == DImode);
19725 int word_mode_size = GET_MODE_SIZE (word_mode);
19727 int offset = 0;
19728 machine_mode load_mode =
19729 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
19730 int load_mode_size = GET_MODE_SIZE (load_mode);
19732 /* We don't want to generate too much code. Also if bytes is
19733 4096 or larger we always want the library strncmp anyway. */
19734 int groups = ROUND_UP (bytes, load_mode_size) / load_mode_size;
19735 if (bytes >= 4096 || groups > rs6000_string_compare_inline_limit)
19736 return false;
19738 rtx result_reg = gen_reg_rtx (word_mode);
19739 rtx final_move_label = gen_label_rtx ();
19740 rtx final_label = gen_label_rtx ();
19741 rtx begin_compare_label = NULL;
19743 if (base_align < 8)
19745 /* Generate code that checks distance to 4k boundary for this case. */
19746 begin_compare_label = gen_label_rtx ();
19747 rtx strncmp_label = gen_label_rtx ();
19748 rtx jmp;
19750 /* Strncmp for power8 in glibc does this:
19751 rldicl r8,r3,0,52
19752 cmpldi cr7,r8,4096-16
19753 bgt cr7,L(pagecross) */
19755 if (align1 < 8)
19756 expand_strncmp_align_check (strncmp_label, src1, bytes);
19757 if (align2 < 8)
19758 expand_strncmp_align_check (strncmp_label, src2, bytes);
19760 /* Now generate the following sequence:
19761 - branch to begin_compare
19762 - strncmp_label
19763 - call to strncmp
19764 - branch to final_label
19765 - begin_compare_label */
19767 rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
19768 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
19769 JUMP_LABEL(jmp) = begin_compare_label;
19770 LABEL_NUSES (begin_compare_label) += 1;
19771 emit_barrier ();
19773 emit_label (strncmp_label);
19775 if (!REG_P (XEXP (src1, 0)))
19777 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
19778 src1 = replace_equiv_address (src1, src1_reg);
19781 if (!REG_P (XEXP (src2, 0)))
19783 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
19784 src2 = replace_equiv_address (src2, src2_reg);
19787 /* -m32 -mpowerpc64 results in word_mode being DImode even
19788 though otherwise it is 32-bit. The length arg to strncmp
19789 is a size_t which will be the same size as pointers. */
19790 rtx len_rtx;
19791 if (TARGET_64BIT)
19792 len_rtx = gen_reg_rtx(DImode);
19793 else
19794 len_rtx = gen_reg_rtx(SImode);
19796 emit_move_insn (len_rtx, bytes_rtx);
19798 emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "strncmp"),
19799 target, LCT_NORMAL, GET_MODE (target), 3,
19800 force_reg (Pmode, XEXP (src1, 0)), Pmode,
19801 force_reg (Pmode, XEXP (src2, 0)), Pmode,
19802 len_rtx, GET_MODE (len_rtx));
19804 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19805 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
19806 JUMP_LABEL (jmp) = final_label;
19807 LABEL_NUSES (final_label) += 1;
19808 emit_barrier ();
19809 emit_label (begin_compare_label);
19812 rtx cleanup_label = NULL;
19813 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
19814 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
19816 /* Generate sequence of ld/ldbrx, cmpb to compare out
19817 to the length specified. */
19818 while (bytes > 0)
19820 /* Compare sequence:
19821 check each 8B with: ld/ld cmpd bne
19822 cleanup code at end:
19823 cmpb get byte that differs
19824 cmpb look for zero byte
19825 orc combine
19826 cntlzd get bit of first zero/diff byte
19827 subfic convert for rldcl use
19828 rldcl rldcl extract diff/zero byte
19829 subf subtract for final result
19831 The last compare can branch around the cleanup code if the
19832 result is zero because the strings are exactly equal. */
19833 int align = compute_current_alignment (base_align, offset);
19834 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19835 load_mode = select_block_compare_mode (offset, bytes, align,
19836 word_mode_ok);
19837 else
19838 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
19839 load_mode_size = GET_MODE_SIZE (load_mode);
19840 if (bytes >= load_mode_size)
19841 cmp_bytes = load_mode_size;
19842 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19844 /* Move this load back so it doesn't go past the end.
19845 P8/P9 can do this efficiently. */
19846 int extra_bytes = load_mode_size - bytes;
19847 cmp_bytes = bytes;
19848 if (extra_bytes < offset)
19850 offset -= extra_bytes;
19851 cmp_bytes = load_mode_size;
19852 bytes = cmp_bytes;
19855 else
19856 /* P7 and earlier can't do the overlapping load trick fast,
19857 so this forces a non-overlapping load and a shift to get
19858 rid of the extra bytes. */
19859 cmp_bytes = bytes;
19861 src1 = adjust_address (orig_src1, load_mode, offset);
19862 src2 = adjust_address (orig_src2, load_mode, offset);
19864 if (!REG_P (XEXP (src1, 0)))
19866 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
19867 src1 = replace_equiv_address (src1, src1_reg);
19869 set_mem_size (src1, cmp_bytes);
19871 if (!REG_P (XEXP (src2, 0)))
19873 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
19874 src2 = replace_equiv_address (src2, src2_reg);
19876 set_mem_size (src2, cmp_bytes);
19878 do_load_for_compare (tmp_reg_src1, src1, load_mode);
19879 do_load_for_compare (tmp_reg_src2, src2, load_mode);
19881 /* We must always left-align the data we read, and
19882 clear any bytes to the right that are beyond the string.
19883 Otherwise the cmpb sequence won't produce the correct
19884 results. The beginning of the compare will be done
19885 with word_mode so will not have any extra shifts or
19886 clear rights. */
19888 if (load_mode_size < word_mode_size)
19890 /* Rotate left first. */
19891 rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
19892 if (word_mode == DImode)
19894 emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
19895 emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
19897 else
19899 emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
19900 emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
19904 if (cmp_bytes < word_mode_size)
19906 /* Now clear right. This plus the rotate can be
19907 turned into a rldicr instruction. */
19908 HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
19909 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
19910 if (word_mode == DImode)
19912 emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
19913 emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
19915 else
19917 emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
19918 emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
19922 int remain = bytes - cmp_bytes;
19924 rtx dst_label;
19925 if (remain > 0)
19927 if (!cleanup_label)
19928 cleanup_label = gen_label_rtx ();
19929 dst_label = cleanup_label;
19931 else
19932 dst_label = final_move_label;
19934 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
19935 rtx cond = gen_reg_rtx (CCmode);
19937 if (remain == 0)
19939 /* For the last chunk, subf. also
19940 generates the zero result we need. */
19941 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
19942 rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
19944 else
19945 emit_move_insn (cond, gen_rtx_COMPARE (CCmode,
19946 tmp_reg_src1, tmp_reg_src2));
19948 rtx cmp_rtx;
19949 if (remain > 0)
19950 cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19951 else
19952 cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
19954 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
19955 lab_ref, pc_rtx);
19956 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19957 JUMP_LABEL (j) = dst_label;
19958 LABEL_NUSES (dst_label) += 1;
19960 offset += cmp_bytes;
19961 bytes -= cmp_bytes;
19964 if (cleanup_label)
19965 emit_label (cleanup_label);
19967 /* Generate the final sequence that identifies the differing
19968 byte and generates the final result, taking into account
19969 zero bytes:
19971 cmpb cmpb_result1, src1, src2
19972 cmpb cmpb_result2, src1, zero
19973 orc cmpb_result1, cmp_result1, cmpb_result2
19974 cntlzd get bit of first zero/diff byte
19975 addi convert for rldcl use
19976 rldcl rldcl extract diff/zero byte
19977 subf subtract for final result
19980 rtx cmpb_diff = gen_reg_rtx (word_mode);
19981 rtx cmpb_zero = gen_reg_rtx (word_mode);
19982 rtx rot_amt = gen_reg_rtx (word_mode);
19983 rtx zero_reg = gen_reg_rtx (word_mode);
19985 rtx rot1_1 = gen_reg_rtx(word_mode);
19986 rtx rot1_2 = gen_reg_rtx(word_mode);
19987 rtx rot2_1 = gen_reg_rtx(word_mode);
19988 rtx rot2_2 = gen_reg_rtx(word_mode);
19990 if (word_mode == SImode)
19992 emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
19993 emit_insn (gen_movsi (zero_reg, GEN_INT(0)));
19994 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
19995 emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
19996 emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
19997 emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
19998 emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
19999 emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
20000 gen_lowpart (SImode, rot_amt)));
20001 emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT(0xff)));
20002 emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
20003 gen_lowpart (SImode, rot_amt)));
20004 emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT(0xff)));
20005 emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
20007 else
20009 emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20010 emit_insn (gen_movdi (zero_reg, GEN_INT(0)));
20011 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20012 emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
20013 emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20014 emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
20015 emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
20016 emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
20017 gen_lowpart (SImode, rot_amt)));
20018 emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT(0xff)));
20019 emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
20020 gen_lowpart (SImode, rot_amt)));
20021 emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT(0xff)));
20022 emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
20025 emit_label (final_move_label);
20026 emit_insn (gen_movsi (target,
20027 gen_lowpart (SImode, result_reg)));
20028 emit_label (final_label);
20029 return true;
20032 /* Expand a block move operation, and return 1 if successful. Return 0
20033 if we should let the compiler generate normal code.
20035 operands[0] is the destination
20036 operands[1] is the source
20037 operands[2] is the length
20038 operands[3] is the alignment */
20040 #define MAX_MOVE_REG 4
20043 expand_block_move (rtx operands[])
20045 rtx orig_dest = operands[0];
20046 rtx orig_src = operands[1];
20047 rtx bytes_rtx = operands[2];
20048 rtx align_rtx = operands[3];
20049 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
20050 int align;
20051 int bytes;
20052 int offset;
20053 int move_bytes;
20054 rtx stores[MAX_MOVE_REG];
20055 int num_reg = 0;
20057 /* If this is not a fixed size move, just call memcpy */
20058 if (! constp)
20059 return 0;
20061 /* This must be a fixed size alignment */
20062 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
20063 align = INTVAL (align_rtx) * BITS_PER_UNIT;
20065 /* Anything to move? */
20066 bytes = INTVAL (bytes_rtx);
20067 if (bytes <= 0)
20068 return 1;
20070 if (bytes > rs6000_block_move_inline_limit)
20071 return 0;
20073 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
20075 union {
20076 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
20077 rtx (*mov) (rtx, rtx);
20078 } gen_func;
20079 machine_mode mode = BLKmode;
20080 rtx src, dest;
20082 /* Altivec first, since it will be faster than a string move
20083 when it applies, and usually not significantly larger. */
20084 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
20086 move_bytes = 16;
20087 mode = V4SImode;
20088 gen_func.mov = gen_movv4si;
20090 else if (TARGET_SPE && bytes >= 8 && align >= 64)
20092 move_bytes = 8;
20093 mode = V2SImode;
20094 gen_func.mov = gen_movv2si;
20096 else if (TARGET_STRING
20097 && bytes > 24 /* move up to 32 bytes at a time */
20098 && ! fixed_regs[5]
20099 && ! fixed_regs[6]
20100 && ! fixed_regs[7]
20101 && ! fixed_regs[8]
20102 && ! fixed_regs[9]
20103 && ! fixed_regs[10]
20104 && ! fixed_regs[11]
20105 && ! fixed_regs[12])
20107 move_bytes = (bytes > 32) ? 32 : bytes;
20108 gen_func.movmemsi = gen_movmemsi_8reg;
20110 else if (TARGET_STRING
20111 && bytes > 16 /* move up to 24 bytes at a time */
20112 && ! fixed_regs[5]
20113 && ! fixed_regs[6]
20114 && ! fixed_regs[7]
20115 && ! fixed_regs[8]
20116 && ! fixed_regs[9]
20117 && ! fixed_regs[10])
20119 move_bytes = (bytes > 24) ? 24 : bytes;
20120 gen_func.movmemsi = gen_movmemsi_6reg;
20122 else if (TARGET_STRING
20123 && bytes > 8 /* move up to 16 bytes at a time */
20124 && ! fixed_regs[5]
20125 && ! fixed_regs[6]
20126 && ! fixed_regs[7]
20127 && ! fixed_regs[8])
20129 move_bytes = (bytes > 16) ? 16 : bytes;
20130 gen_func.movmemsi = gen_movmemsi_4reg;
20132 else if (bytes >= 8 && TARGET_POWERPC64
20133 && (align >= 64 || !STRICT_ALIGNMENT))
20135 move_bytes = 8;
20136 mode = DImode;
20137 gen_func.mov = gen_movdi;
20138 if (offset == 0 && align < 64)
20140 rtx addr;
20142 /* If the address form is reg+offset with offset not a
20143 multiple of four, reload into reg indirect form here
20144 rather than waiting for reload. This way we get one
20145 reload, not one per load and/or store. */
20146 addr = XEXP (orig_dest, 0);
20147 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
20148 && GET_CODE (XEXP (addr, 1)) == CONST_INT
20149 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
20151 addr = copy_addr_to_reg (addr);
20152 orig_dest = replace_equiv_address (orig_dest, addr);
20154 addr = XEXP (orig_src, 0);
20155 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
20156 && GET_CODE (XEXP (addr, 1)) == CONST_INT
20157 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
20159 addr = copy_addr_to_reg (addr);
20160 orig_src = replace_equiv_address (orig_src, addr);
20164 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
20165 { /* move up to 8 bytes at a time */
20166 move_bytes = (bytes > 8) ? 8 : bytes;
20167 gen_func.movmemsi = gen_movmemsi_2reg;
20169 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
20170 { /* move 4 bytes */
20171 move_bytes = 4;
20172 mode = SImode;
20173 gen_func.mov = gen_movsi;
20175 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
20176 { /* move 2 bytes */
20177 move_bytes = 2;
20178 mode = HImode;
20179 gen_func.mov = gen_movhi;
20181 else if (TARGET_STRING && bytes > 1)
20182 { /* move up to 4 bytes at a time */
20183 move_bytes = (bytes > 4) ? 4 : bytes;
20184 gen_func.movmemsi = gen_movmemsi_1reg;
20186 else /* move 1 byte at a time */
20188 move_bytes = 1;
20189 mode = QImode;
20190 gen_func.mov = gen_movqi;
20193 src = adjust_address (orig_src, mode, offset);
20194 dest = adjust_address (orig_dest, mode, offset);
20196 if (mode != BLKmode)
20198 rtx tmp_reg = gen_reg_rtx (mode);
20200 emit_insn ((*gen_func.mov) (tmp_reg, src));
20201 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
20204 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
20206 int i;
20207 for (i = 0; i < num_reg; i++)
20208 emit_insn (stores[i]);
20209 num_reg = 0;
20212 if (mode == BLKmode)
20214 /* Move the address into scratch registers. The movmemsi
20215 patterns require zero offset. */
20216 if (!REG_P (XEXP (src, 0)))
20218 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
20219 src = replace_equiv_address (src, src_reg);
20221 set_mem_size (src, move_bytes);
20223 if (!REG_P (XEXP (dest, 0)))
20225 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
20226 dest = replace_equiv_address (dest, dest_reg);
20228 set_mem_size (dest, move_bytes);
20230 emit_insn ((*gen_func.movmemsi) (dest, src,
20231 GEN_INT (move_bytes & 31),
20232 align_rtx));
20236 return 1;
20240 /* Return a string to perform a load_multiple operation.
20241 operands[0] is the vector.
20242 operands[1] is the source address.
20243 operands[2] is the first destination register. */
20245 const char *
20246 rs6000_output_load_multiple (rtx operands[3])
20248 /* We have to handle the case where the pseudo used to contain the address
20249 is assigned to one of the output registers. */
20250 int i, j;
20251 int words = XVECLEN (operands[0], 0);
20252 rtx xop[10];
20254 if (XVECLEN (operands[0], 0) == 1)
20255 return "lwz %2,0(%1)";
20257 for (i = 0; i < words; i++)
20258 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
20260 if (i == words-1)
20262 xop[0] = GEN_INT (4 * (words-1));
20263 xop[1] = operands[1];
20264 xop[2] = operands[2];
20265 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
20266 return "";
20268 else if (i == 0)
20270 xop[0] = GEN_INT (4 * (words-1));
20271 xop[1] = operands[1];
20272 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
20273 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
20274 return "";
20276 else
20278 for (j = 0; j < words; j++)
20279 if (j != i)
20281 xop[0] = GEN_INT (j * 4);
20282 xop[1] = operands[1];
20283 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
20284 output_asm_insn ("lwz %2,%0(%1)", xop);
20286 xop[0] = GEN_INT (i * 4);
20287 xop[1] = operands[1];
20288 output_asm_insn ("lwz %1,%0(%1)", xop);
20289 return "";
20293 return "lswi %2,%1,%N0";
20297 /* A validation routine: say whether CODE, a condition code, and MODE
20298 match. The other alternatives either don't make sense or should
20299 never be generated. */
20301 void
20302 validate_condition_mode (enum rtx_code code, machine_mode mode)
20304 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
20305 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
20306 && GET_MODE_CLASS (mode) == MODE_CC);
20308 /* These don't make sense. */
20309 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
20310 || mode != CCUNSmode);
20312 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
20313 || mode == CCUNSmode);
20315 gcc_assert (mode == CCFPmode
20316 || (code != ORDERED && code != UNORDERED
20317 && code != UNEQ && code != LTGT
20318 && code != UNGT && code != UNLT
20319 && code != UNGE && code != UNLE));
20321 /* These should never be generated except for
20322 flag_finite_math_only. */
20323 gcc_assert (mode != CCFPmode
20324 || flag_finite_math_only
20325 || (code != LE && code != GE
20326 && code != UNEQ && code != LTGT
20327 && code != UNGT && code != UNLT));
20329 /* These are invalid; the information is not there. */
20330 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
20334 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
20335 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
20336 not zero, store there the bit offset (counted from the right) where
20337 the single stretch of 1 bits begins; and similarly for B, the bit
20338 offset where it ends. */
20340 bool
20341 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
20343 unsigned HOST_WIDE_INT val = INTVAL (mask);
20344 unsigned HOST_WIDE_INT bit;
20345 int nb, ne;
20346 int n = GET_MODE_PRECISION (mode);
20348 if (mode != DImode && mode != SImode)
20349 return false;
20351 if (INTVAL (mask) >= 0)
20353 bit = val & -val;
20354 ne = exact_log2 (bit);
20355 nb = exact_log2 (val + bit);
20357 else if (val + 1 == 0)
20359 nb = n;
20360 ne = 0;
20362 else if (val & 1)
20364 val = ~val;
20365 bit = val & -val;
20366 nb = exact_log2 (bit);
20367 ne = exact_log2 (val + bit);
20369 else
20371 bit = val & -val;
20372 ne = exact_log2 (bit);
20373 if (val + bit == 0)
20374 nb = n;
20375 else
20376 nb = 0;
20379 nb--;
20381 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
20382 return false;
20384 if (b)
20385 *b = nb;
20386 if (e)
20387 *e = ne;
20389 return true;
20392 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
20393 or rldicr instruction, to implement an AND with it in mode MODE. */
20395 bool
20396 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
20398 int nb, ne;
20400 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
20401 return false;
20403 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
20404 does not wrap. */
20405 if (mode == DImode)
20406 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
20408 /* For SImode, rlwinm can do everything. */
20409 if (mode == SImode)
20410 return (nb < 32 && ne < 32);
20412 return false;
20415 /* Return the instruction template for an AND with mask in mode MODE, with
20416 operands OPERANDS. If DOT is true, make it a record-form instruction. */
20418 const char *
20419 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
20421 int nb, ne;
20423 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
20424 gcc_unreachable ();
20426 if (mode == DImode && ne == 0)
20428 operands[3] = GEN_INT (63 - nb);
20429 if (dot)
20430 return "rldicl. %0,%1,0,%3";
20431 return "rldicl %0,%1,0,%3";
20434 if (mode == DImode && nb == 63)
20436 operands[3] = GEN_INT (63 - ne);
20437 if (dot)
20438 return "rldicr. %0,%1,0,%3";
20439 return "rldicr %0,%1,0,%3";
20442 if (nb < 32 && ne < 32)
20444 operands[3] = GEN_INT (31 - nb);
20445 operands[4] = GEN_INT (31 - ne);
20446 if (dot)
20447 return "rlwinm. %0,%1,0,%3,%4";
20448 return "rlwinm %0,%1,0,%3,%4";
20451 gcc_unreachable ();
20454 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
20455 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
20456 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
20458 bool
20459 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
20461 int nb, ne;
20463 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
20464 return false;
20466 int n = GET_MODE_PRECISION (mode);
20467 int sh = -1;
20469 if (CONST_INT_P (XEXP (shift, 1)))
20471 sh = INTVAL (XEXP (shift, 1));
20472 if (sh < 0 || sh >= n)
20473 return false;
20476 rtx_code code = GET_CODE (shift);
20478 /* Convert any shift by 0 to a rotate, to simplify below code. */
20479 if (sh == 0)
20480 code = ROTATE;
20482 /* Convert rotate to simple shift if we can, to make analysis simpler. */
20483 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
20484 code = ASHIFT;
20485 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
20487 code = LSHIFTRT;
20488 sh = n - sh;
20491 /* DImode rotates need rld*. */
20492 if (mode == DImode && code == ROTATE)
20493 return (nb == 63 || ne == 0 || ne == sh);
20495 /* SImode rotates need rlw*. */
20496 if (mode == SImode && code == ROTATE)
20497 return (nb < 32 && ne < 32 && sh < 32);
20499 /* Wrap-around masks are only okay for rotates. */
20500 if (ne > nb)
20501 return false;
20503 /* Variable shifts are only okay for rotates. */
20504 if (sh < 0)
20505 return false;
20507 /* Don't allow ASHIFT if the mask is wrong for that. */
20508 if (code == ASHIFT && ne < sh)
20509 return false;
20511 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
20512 if the mask is wrong for that. */
20513 if (nb < 32 && ne < 32 && sh < 32
20514 && !(code == LSHIFTRT && nb >= 32 - sh))
20515 return true;
20517 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
20518 if the mask is wrong for that. */
20519 if (code == LSHIFTRT)
20520 sh = 64 - sh;
20521 if (nb == 63 || ne == 0 || ne == sh)
20522 return !(code == LSHIFTRT && nb >= sh);
20524 return false;
20527 /* Return the instruction template for a shift with mask in mode MODE, with
20528 operands OPERANDS. If DOT is true, make it a record-form instruction. */
20530 const char *
20531 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
20533 int nb, ne;
20535 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
20536 gcc_unreachable ();
20538 if (mode == DImode && ne == 0)
20540 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
20541 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
20542 operands[3] = GEN_INT (63 - nb);
20543 if (dot)
20544 return "rld%I2cl. %0,%1,%2,%3";
20545 return "rld%I2cl %0,%1,%2,%3";
20548 if (mode == DImode && nb == 63)
20550 operands[3] = GEN_INT (63 - ne);
20551 if (dot)
20552 return "rld%I2cr. %0,%1,%2,%3";
20553 return "rld%I2cr %0,%1,%2,%3";
20556 if (mode == DImode
20557 && GET_CODE (operands[4]) != LSHIFTRT
20558 && CONST_INT_P (operands[2])
20559 && ne == INTVAL (operands[2]))
20561 operands[3] = GEN_INT (63 - nb);
20562 if (dot)
20563 return "rld%I2c. %0,%1,%2,%3";
20564 return "rld%I2c %0,%1,%2,%3";
20567 if (nb < 32 && ne < 32)
20569 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
20570 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
20571 operands[3] = GEN_INT (31 - nb);
20572 operands[4] = GEN_INT (31 - ne);
20573 /* This insn can also be a 64-bit rotate with mask that really makes
20574 it just a shift right (with mask); the %h below are to adjust for
20575 that situation (shift count is >= 32 in that case). */
20576 if (dot)
20577 return "rlw%I2nm. %0,%1,%h2,%3,%4";
20578 return "rlw%I2nm %0,%1,%h2,%3,%4";
20581 gcc_unreachable ();
20584 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
20585 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
20586 ASHIFT, or LSHIFTRT) in mode MODE. */
20588 bool
20589 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
20591 int nb, ne;
20593 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
20594 return false;
20596 int n = GET_MODE_PRECISION (mode);
20598 int sh = INTVAL (XEXP (shift, 1));
20599 if (sh < 0 || sh >= n)
20600 return false;
20602 rtx_code code = GET_CODE (shift);
20604 /* Convert any shift by 0 to a rotate, to simplify below code. */
20605 if (sh == 0)
20606 code = ROTATE;
20608 /* Convert rotate to simple shift if we can, to make analysis simpler. */
20609 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
20610 code = ASHIFT;
20611 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
20613 code = LSHIFTRT;
20614 sh = n - sh;
20617 /* DImode rotates need rldimi. */
20618 if (mode == DImode && code == ROTATE)
20619 return (ne == sh);
20621 /* SImode rotates need rlwimi. */
20622 if (mode == SImode && code == ROTATE)
20623 return (nb < 32 && ne < 32 && sh < 32);
20625 /* Wrap-around masks are only okay for rotates. */
20626 if (ne > nb)
20627 return false;
20629 /* Don't allow ASHIFT if the mask is wrong for that. */
20630 if (code == ASHIFT && ne < sh)
20631 return false;
20633 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
20634 if the mask is wrong for that. */
20635 if (nb < 32 && ne < 32 && sh < 32
20636 && !(code == LSHIFTRT && nb >= 32 - sh))
20637 return true;
20639 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
20640 if the mask is wrong for that. */
20641 if (code == LSHIFTRT)
20642 sh = 64 - sh;
20643 if (ne == sh)
20644 return !(code == LSHIFTRT && nb >= sh);
20646 return false;
20649 /* Return the instruction template for an insert with mask in mode MODE, with
20650 operands OPERANDS. If DOT is true, make it a record-form instruction. */
20652 const char *
20653 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
20655 int nb, ne;
20657 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
20658 gcc_unreachable ();
20660 /* Prefer rldimi because rlwimi is cracked. */
20661 if (TARGET_POWERPC64
20662 && (!dot || mode == DImode)
20663 && GET_CODE (operands[4]) != LSHIFTRT
20664 && ne == INTVAL (operands[2]))
20666 operands[3] = GEN_INT (63 - nb);
20667 if (dot)
20668 return "rldimi. %0,%1,%2,%3";
20669 return "rldimi %0,%1,%2,%3";
20672 if (nb < 32 && ne < 32)
20674 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
20675 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
20676 operands[3] = GEN_INT (31 - nb);
20677 operands[4] = GEN_INT (31 - ne);
20678 if (dot)
20679 return "rlwimi. %0,%1,%2,%3,%4";
20680 return "rlwimi %0,%1,%2,%3,%4";
20683 gcc_unreachable ();
20686 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
20687 using two machine instructions. */
20689 bool
20690 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
20692 /* There are two kinds of AND we can handle with two insns:
20693 1) those we can do with two rl* insn;
20694 2) ori[s];xori[s].
20696 We do not handle that last case yet. */
20698 /* If there is just one stretch of ones, we can do it. */
20699 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
20700 return true;
20702 /* Otherwise, fill in the lowest "hole"; if we can do the result with
20703 one insn, we can do the whole thing with two. */
20704 unsigned HOST_WIDE_INT val = INTVAL (c);
20705 unsigned HOST_WIDE_INT bit1 = val & -val;
20706 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
20707 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
20708 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
20709 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
20712 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
20713 If EXPAND is true, split rotate-and-mask instructions we generate to
20714 their constituent parts as well (this is used during expand); if DOT
20715 is 1, make the last insn a record-form instruction clobbering the
20716 destination GPR and setting the CC reg (from operands[3]); if 2, set
20717 that GPR as well as the CC reg. */
20719 void
20720 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
20722 gcc_assert (!(expand && dot));
20724 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
20726 /* If it is one stretch of ones, it is DImode; shift left, mask, then
20727 shift right. This generates better code than doing the masks without
20728 shifts, or shifting first right and then left. */
20729 int nb, ne;
20730 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
20732 gcc_assert (mode == DImode);
20734 int shift = 63 - nb;
20735 if (expand)
20737 rtx tmp1 = gen_reg_rtx (DImode);
20738 rtx tmp2 = gen_reg_rtx (DImode);
20739 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
20740 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
20741 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
20743 else
20745 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
20746 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
20747 emit_move_insn (operands[0], tmp);
20748 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
20749 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20751 return;
20754 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
20755 that does the rest. */
20756 unsigned HOST_WIDE_INT bit1 = val & -val;
20757 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
20758 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
20759 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
20761 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
20762 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
20764 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
20766 /* Two "no-rotate"-and-mask instructions, for SImode. */
20767 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
20769 gcc_assert (mode == SImode);
20771 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
20772 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
20773 emit_move_insn (reg, tmp);
20774 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
20775 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20776 return;
20779 gcc_assert (mode == DImode);
20781 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
20782 insns; we have to do the first in SImode, because it wraps. */
20783 if (mask2 <= 0xffffffff
20784 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
20786 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
20787 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
20788 GEN_INT (mask1));
20789 rtx reg_low = gen_lowpart (SImode, reg);
20790 emit_move_insn (reg_low, tmp);
20791 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
20792 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20793 return;
20796 /* Two rld* insns: rotate, clear the hole in the middle (which now is
20797 at the top end), rotate back and clear the other hole. */
20798 int right = exact_log2 (bit3);
20799 int left = 64 - right;
20801 /* Rotate the mask too. */
20802 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
20804 if (expand)
20806 rtx tmp1 = gen_reg_rtx (DImode);
20807 rtx tmp2 = gen_reg_rtx (DImode);
20808 rtx tmp3 = gen_reg_rtx (DImode);
20809 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
20810 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
20811 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
20812 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
20814 else
20816 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
20817 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
20818 emit_move_insn (operands[0], tmp);
20819 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
20820 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
20821 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20825 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
20826 for lfq and stfq insns iff the registers are hard registers. */
20829 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
20831 /* We might have been passed a SUBREG. */
20832 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
20833 return 0;
20835 /* We might have been passed non floating point registers. */
20836 if (!FP_REGNO_P (REGNO (reg1))
20837 || !FP_REGNO_P (REGNO (reg2)))
20838 return 0;
20840 return (REGNO (reg1) == REGNO (reg2) - 1);
20843 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
20844 addr1 and addr2 must be in consecutive memory locations
20845 (addr2 == addr1 + 8). */
20848 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
20850 rtx addr1, addr2;
20851 unsigned int reg1, reg2;
20852 int offset1, offset2;
20854 /* The mems cannot be volatile. */
20855 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
20856 return 0;
20858 addr1 = XEXP (mem1, 0);
20859 addr2 = XEXP (mem2, 0);
20861 /* Extract an offset (if used) from the first addr. */
20862 if (GET_CODE (addr1) == PLUS)
20864 /* If not a REG, return zero. */
20865 if (GET_CODE (XEXP (addr1, 0)) != REG)
20866 return 0;
20867 else
20869 reg1 = REGNO (XEXP (addr1, 0));
20870 /* The offset must be constant! */
20871 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
20872 return 0;
20873 offset1 = INTVAL (XEXP (addr1, 1));
20876 else if (GET_CODE (addr1) != REG)
20877 return 0;
20878 else
20880 reg1 = REGNO (addr1);
20881 /* This was a simple (mem (reg)) expression. Offset is 0. */
20882 offset1 = 0;
20885 /* And now for the second addr. */
20886 if (GET_CODE (addr2) == PLUS)
20888 /* If not a REG, return zero. */
20889 if (GET_CODE (XEXP (addr2, 0)) != REG)
20890 return 0;
20891 else
20893 reg2 = REGNO (XEXP (addr2, 0));
20894 /* The offset must be constant. */
20895 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
20896 return 0;
20897 offset2 = INTVAL (XEXP (addr2, 1));
20900 else if (GET_CODE (addr2) != REG)
20901 return 0;
20902 else
20904 reg2 = REGNO (addr2);
20905 /* This was a simple (mem (reg)) expression. Offset is 0. */
20906 offset2 = 0;
20909 /* Both of these must have the same base register. */
20910 if (reg1 != reg2)
20911 return 0;
20913 /* The offset for the second addr must be 8 more than the first addr. */
20914 if (offset2 != offset1 + 8)
20915 return 0;
20917 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
20918 instructions. */
20919 return 1;
20924 rs6000_secondary_memory_needed_rtx (machine_mode mode)
20926 static bool eliminated = false;
20927 rtx ret;
20929 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
20930 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20931 else
20933 rtx mem = cfun->machine->sdmode_stack_slot;
20934 gcc_assert (mem != NULL_RTX);
20936 if (!eliminated)
20938 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
20939 cfun->machine->sdmode_stack_slot = mem;
20940 eliminated = true;
20942 ret = mem;
20945 if (TARGET_DEBUG_ADDR)
20947 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
20948 GET_MODE_NAME (mode));
20949 if (!ret)
20950 fprintf (stderr, "\tNULL_RTX\n");
20951 else
20952 debug_rtx (ret);
20955 return ret;
20958 /* Return the mode to be used for memory when a secondary memory
20959 location is needed. For SDmode values we need to use DDmode, in
20960 all other cases we can use the same mode. */
20961 machine_mode
20962 rs6000_secondary_memory_needed_mode (machine_mode mode)
20964 if (lra_in_progress && mode == SDmode)
20965 return DDmode;
20966 return mode;
20969 static tree
20970 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
20972 /* Don't walk into types. */
20973 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
20975 *walk_subtrees = 0;
20976 return NULL_TREE;
20979 switch (TREE_CODE (*tp))
20981 case VAR_DECL:
20982 case PARM_DECL:
20983 case FIELD_DECL:
20984 case RESULT_DECL:
20985 case SSA_NAME:
20986 case REAL_CST:
20987 case MEM_REF:
20988 case VIEW_CONVERT_EXPR:
20989 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
20990 return *tp;
20991 break;
20992 default:
20993 break;
20996 return NULL_TREE;
20999 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21000 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21001 only work on the traditional altivec registers, note if an altivec register
21002 was chosen. */
21004 static enum rs6000_reg_type
21005 register_to_reg_type (rtx reg, bool *is_altivec)
21007 HOST_WIDE_INT regno;
21008 enum reg_class rclass;
21010 if (GET_CODE (reg) == SUBREG)
21011 reg = SUBREG_REG (reg);
21013 if (!REG_P (reg))
21014 return NO_REG_TYPE;
21016 regno = REGNO (reg);
21017 if (regno >= FIRST_PSEUDO_REGISTER)
21019 if (!lra_in_progress && !reload_in_progress && !reload_completed)
21020 return PSEUDO_REG_TYPE;
21022 regno = true_regnum (reg);
21023 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21024 return PSEUDO_REG_TYPE;
21027 gcc_assert (regno >= 0);
21029 if (is_altivec && ALTIVEC_REGNO_P (regno))
21030 *is_altivec = true;
21032 rclass = rs6000_regno_regclass[regno];
21033 return reg_class_to_reg_type[(int)rclass];
21036 /* Helper function to return the cost of adding a TOC entry address. */
21038 static inline int
21039 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
21041 int ret;
21043 if (TARGET_CMODEL != CMODEL_SMALL)
21044 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
21046 else
21047 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
21049 return ret;
21052 /* Helper function for rs6000_secondary_reload to determine whether the memory
21053 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21054 needs reloading. Return negative if the memory is not handled by the memory
21055 helper functions and to try a different reload method, 0 if no additional
21056 instructions are need, and positive to give the extra cost for the
21057 memory. */
21059 static int
21060 rs6000_secondary_reload_memory (rtx addr,
21061 enum reg_class rclass,
21062 machine_mode mode)
21064 int extra_cost = 0;
21065 rtx reg, and_arg, plus_arg0, plus_arg1;
21066 addr_mask_type addr_mask;
21067 const char *type = NULL;
21068 const char *fail_msg = NULL;
21070 if (GPR_REG_CLASS_P (rclass))
21071 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21073 else if (rclass == FLOAT_REGS)
21074 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21076 else if (rclass == ALTIVEC_REGS)
21077 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21079 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21080 else if (rclass == VSX_REGS)
21081 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
21082 & ~RELOAD_REG_AND_M16);
21084 /* If the register allocator hasn't made up its mind yet on the register
21085 class to use, settle on defaults to use. */
21086 else if (rclass == NO_REGS)
21088 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
21089 & ~RELOAD_REG_AND_M16);
21091 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
21092 addr_mask &= ~(RELOAD_REG_INDEXED
21093 | RELOAD_REG_PRE_INCDEC
21094 | RELOAD_REG_PRE_MODIFY);
21097 else
21098 addr_mask = 0;
21100 /* If the register isn't valid in this register class, just return now. */
21101 if ((addr_mask & RELOAD_REG_VALID) == 0)
21103 if (TARGET_DEBUG_ADDR)
21105 fprintf (stderr,
21106 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21107 "not valid in class\n",
21108 GET_MODE_NAME (mode), reg_class_names[rclass]);
21109 debug_rtx (addr);
21112 return -1;
21115 switch (GET_CODE (addr))
21117 /* Does the register class supports auto update forms for this mode? We
21118 don't need a scratch register, since the powerpc only supports
21119 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21120 case PRE_INC:
21121 case PRE_DEC:
21122 reg = XEXP (addr, 0);
21123 if (!base_reg_operand (addr, GET_MODE (reg)))
21125 fail_msg = "no base register #1";
21126 extra_cost = -1;
21129 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21131 extra_cost = 1;
21132 type = "update";
21134 break;
21136 case PRE_MODIFY:
21137 reg = XEXP (addr, 0);
21138 plus_arg1 = XEXP (addr, 1);
21139 if (!base_reg_operand (reg, GET_MODE (reg))
21140 || GET_CODE (plus_arg1) != PLUS
21141 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
21143 fail_msg = "bad PRE_MODIFY";
21144 extra_cost = -1;
21147 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
21149 extra_cost = 1;
21150 type = "update";
21152 break;
21154 /* Do we need to simulate AND -16 to clear the bottom address bits used
21155 in VMX load/stores? Only allow the AND for vector sizes. */
21156 case AND:
21157 and_arg = XEXP (addr, 0);
21158 if (GET_MODE_SIZE (mode) != 16
21159 || GET_CODE (XEXP (addr, 1)) != CONST_INT
21160 || INTVAL (XEXP (addr, 1)) != -16)
21162 fail_msg = "bad Altivec AND #1";
21163 extra_cost = -1;
21166 if (rclass != ALTIVEC_REGS)
21168 if (legitimate_indirect_address_p (and_arg, false))
21169 extra_cost = 1;
21171 else if (legitimate_indexed_address_p (and_arg, false))
21172 extra_cost = 2;
21174 else
21176 fail_msg = "bad Altivec AND #2";
21177 extra_cost = -1;
21180 type = "and";
21182 break;
21184 /* If this is an indirect address, make sure it is a base register. */
21185 case REG:
21186 case SUBREG:
21187 if (!legitimate_indirect_address_p (addr, false))
21189 extra_cost = 1;
21190 type = "move";
21192 break;
21194 /* If this is an indexed address, make sure the register class can handle
21195 indexed addresses for this mode. */
21196 case PLUS:
21197 plus_arg0 = XEXP (addr, 0);
21198 plus_arg1 = XEXP (addr, 1);
21200 /* (plus (plus (reg) (constant)) (constant)) is generated during
21201 push_reload processing, so handle it now. */
21202 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
21204 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21206 extra_cost = 1;
21207 type = "offset";
21211 /* (plus (plus (reg) (constant)) (reg)) is also generated during
21212 push_reload processing, so handle it now. */
21213 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
21215 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21217 extra_cost = 1;
21218 type = "indexed #2";
21222 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
21224 fail_msg = "no base register #2";
21225 extra_cost = -1;
21228 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
21230 if ((addr_mask & RELOAD_REG_INDEXED) == 0
21231 || !legitimate_indexed_address_p (addr, false))
21233 extra_cost = 1;
21234 type = "indexed";
21238 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
21239 && CONST_INT_P (plus_arg1))
21241 if (!quad_address_offset_p (INTVAL (plus_arg1)))
21243 extra_cost = 1;
21244 type = "vector d-form offset";
21248 /* Make sure the register class can handle offset addresses. */
21249 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
21251 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21253 extra_cost = 1;
21254 type = "offset #2";
21258 else
21260 fail_msg = "bad PLUS";
21261 extra_cost = -1;
21264 break;
21266 case LO_SUM:
21267 /* Quad offsets are restricted and can't handle normal addresses. */
21268 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
21270 extra_cost = -1;
21271 type = "vector d-form lo_sum";
21274 else if (!legitimate_lo_sum_address_p (mode, addr, false))
21276 fail_msg = "bad LO_SUM";
21277 extra_cost = -1;
21280 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21282 extra_cost = 1;
21283 type = "lo_sum";
21285 break;
21287 /* Static addresses need to create a TOC entry. */
21288 case CONST:
21289 case SYMBOL_REF:
21290 case LABEL_REF:
21291 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
21293 extra_cost = -1;
21294 type = "vector d-form lo_sum #2";
21297 else
21299 type = "address";
21300 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
21302 break;
21304 /* TOC references look like offsetable memory. */
21305 case UNSPEC:
21306 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
21308 fail_msg = "bad UNSPEC";
21309 extra_cost = -1;
21312 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
21314 extra_cost = -1;
21315 type = "vector d-form lo_sum #3";
21318 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21320 extra_cost = 1;
21321 type = "toc reference";
21323 break;
21325 default:
21327 fail_msg = "bad address";
21328 extra_cost = -1;
21332 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
21334 if (extra_cost < 0)
21335 fprintf (stderr,
21336 "rs6000_secondary_reload_memory error: mode = %s, "
21337 "class = %s, addr_mask = '%s', %s\n",
21338 GET_MODE_NAME (mode),
21339 reg_class_names[rclass],
21340 rs6000_debug_addr_mask (addr_mask, false),
21341 (fail_msg != NULL) ? fail_msg : "<bad address>");
21343 else
21344 fprintf (stderr,
21345 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21346 "addr_mask = '%s', extra cost = %d, %s\n",
21347 GET_MODE_NAME (mode),
21348 reg_class_names[rclass],
21349 rs6000_debug_addr_mask (addr_mask, false),
21350 extra_cost,
21351 (type) ? type : "<none>");
21353 debug_rtx (addr);
21356 return extra_cost;
21359 /* Helper function for rs6000_secondary_reload to return true if a move to a
21360 different register classe is really a simple move. */
21362 static bool
21363 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
21364 enum rs6000_reg_type from_type,
21365 machine_mode mode)
21367 int size = GET_MODE_SIZE (mode);
21369 /* Add support for various direct moves available. In this function, we only
21370 look at cases where we don't need any extra registers, and one or more
21371 simple move insns are issued. Originally small integers are not allowed
21372 in FPR/VSX registers. Single precision binary floating is not a simple
21373 move because we need to convert to the single precision memory layout.
21374 The 4-byte SDmode can be moved. TDmode values are disallowed since they
21375 need special direct move handling, which we do not support yet. */
21376 if (TARGET_DIRECT_MOVE
21377 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21378 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
21380 if (TARGET_POWERPC64)
21382 /* ISA 2.07: MTVSRD or MVFVSRD. */
21383 if (size == 8)
21384 return true;
21386 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
21387 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
21388 return true;
21391 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
21392 if (TARGET_VSX_SMALL_INTEGER)
21394 if (mode == SImode)
21395 return true;
21397 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
21398 return true;
21401 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
21402 if (mode == SDmode)
21403 return true;
21406 /* Power6+: MFTGPR or MFFGPR. */
21407 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
21408 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
21409 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
21410 return true;
21412 /* Move to/from SPR. */
21413 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
21414 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
21415 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
21416 return true;
21418 return false;
21421 /* Direct move helper function for rs6000_secondary_reload, handle all of the
21422 special direct moves that involve allocating an extra register, return the
21423 insn code of the helper function if there is such a function or
21424 CODE_FOR_nothing if not. */
21426 static bool
21427 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
21428 enum rs6000_reg_type from_type,
21429 machine_mode mode,
21430 secondary_reload_info *sri,
21431 bool altivec_p)
21433 bool ret = false;
21434 enum insn_code icode = CODE_FOR_nothing;
21435 int cost = 0;
21436 int size = GET_MODE_SIZE (mode);
21438 if (TARGET_POWERPC64 && size == 16)
21440 /* Handle moving 128-bit values from GPRs to VSX point registers on
21441 ISA 2.07 (power8, power9) when running in 64-bit mode using
21442 XXPERMDI to glue the two 64-bit values back together. */
21443 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
21445 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
21446 icode = reg_addr[mode].reload_vsx_gpr;
21449 /* Handle moving 128-bit values from VSX point registers to GPRs on
21450 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
21451 bottom 64-bit value. */
21452 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21454 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
21455 icode = reg_addr[mode].reload_gpr_vsx;
21459 else if (TARGET_POWERPC64 && mode == SFmode)
21461 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
21463 cost = 3; /* xscvdpspn, mfvsrd, and. */
21464 icode = reg_addr[mode].reload_gpr_vsx;
21467 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
21469 cost = 2; /* mtvsrz, xscvspdpn. */
21470 icode = reg_addr[mode].reload_vsx_gpr;
21474 else if (!TARGET_POWERPC64 && size == 8)
21476 /* Handle moving 64-bit values from GPRs to floating point registers on
21477 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
21478 32-bit values back together. Altivec register classes must be handled
21479 specially since a different instruction is used, and the secondary
21480 reload support requires a single instruction class in the scratch
21481 register constraint. However, right now TFmode is not allowed in
21482 Altivec registers, so the pattern will never match. */
21483 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
21485 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
21486 icode = reg_addr[mode].reload_fpr_gpr;
21490 if (icode != CODE_FOR_nothing)
21492 ret = true;
21493 if (sri)
21495 sri->icode = icode;
21496 sri->extra_cost = cost;
21500 return ret;
21503 /* Return whether a move between two register classes can be done either
21504 directly (simple move) or via a pattern that uses a single extra temporary
21505 (using ISA 2.07's direct move in this case. */
21507 static bool
21508 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
21509 enum rs6000_reg_type from_type,
21510 machine_mode mode,
21511 secondary_reload_info *sri,
21512 bool altivec_p)
21514 /* Fall back to load/store reloads if either type is not a register. */
21515 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
21516 return false;
21518 /* If we haven't allocated registers yet, assume the move can be done for the
21519 standard register types. */
21520 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
21521 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
21522 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
21523 return true;
21525 /* Moves to the same set of registers is a simple move for non-specialized
21526 registers. */
21527 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
21528 return true;
21530 /* Check whether a simple move can be done directly. */
21531 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
21533 if (sri)
21535 sri->icode = CODE_FOR_nothing;
21536 sri->extra_cost = 0;
21538 return true;
21541 /* Now check if we can do it in a few steps. */
21542 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
21543 altivec_p);
21546 /* Inform reload about cases where moving X with a mode MODE to a register in
21547 RCLASS requires an extra scratch or immediate register. Return the class
21548 needed for the immediate register.
21550 For VSX and Altivec, we may need a register to convert sp+offset into
21551 reg+sp.
21553 For misaligned 64-bit gpr loads and stores we need a register to
21554 convert an offset address to indirect. */
21556 static reg_class_t
21557 rs6000_secondary_reload (bool in_p,
21558 rtx x,
21559 reg_class_t rclass_i,
21560 machine_mode mode,
21561 secondary_reload_info *sri)
21563 enum reg_class rclass = (enum reg_class) rclass_i;
21564 reg_class_t ret = ALL_REGS;
21565 enum insn_code icode;
21566 bool default_p = false;
21567 bool done_p = false;
21569 /* Allow subreg of memory before/during reload. */
21570 bool memory_p = (MEM_P (x)
21571 || (!reload_completed && GET_CODE (x) == SUBREG
21572 && MEM_P (SUBREG_REG (x))));
21574 sri->icode = CODE_FOR_nothing;
21575 sri->t_icode = CODE_FOR_nothing;
21576 sri->extra_cost = 0;
21577 icode = ((in_p)
21578 ? reg_addr[mode].reload_load
21579 : reg_addr[mode].reload_store);
21581 if (REG_P (x) || register_operand (x, mode))
21583 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
21584 bool altivec_p = (rclass == ALTIVEC_REGS);
21585 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
21587 if (!in_p)
21588 std::swap (to_type, from_type);
21590 /* Can we do a direct move of some sort? */
21591 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
21592 altivec_p))
21594 icode = (enum insn_code)sri->icode;
21595 default_p = false;
21596 done_p = true;
21597 ret = NO_REGS;
21601 /* Make sure 0.0 is not reloaded or forced into memory. */
21602 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
21604 ret = NO_REGS;
21605 default_p = false;
21606 done_p = true;
21609 /* If this is a scalar floating point value and we want to load it into the
21610 traditional Altivec registers, do it via a move via a traditional floating
21611 point register, unless we have D-form addressing. Also make sure that
21612 non-zero constants use a FPR. */
21613 if (!done_p && reg_addr[mode].scalar_in_vmx_p
21614 && !mode_supports_vmx_dform (mode)
21615 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
21616 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
21618 ret = FLOAT_REGS;
21619 default_p = false;
21620 done_p = true;
21623 /* Handle reload of load/stores if we have reload helper functions. */
21624 if (!done_p && icode != CODE_FOR_nothing && memory_p)
21626 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
21627 mode);
21629 if (extra_cost >= 0)
21631 done_p = true;
21632 ret = NO_REGS;
21633 if (extra_cost > 0)
21635 sri->extra_cost = extra_cost;
21636 sri->icode = icode;
21641 /* Handle unaligned loads and stores of integer registers. */
21642 if (!done_p && TARGET_POWERPC64
21643 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
21644 && memory_p
21645 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
21647 rtx addr = XEXP (x, 0);
21648 rtx off = address_offset (addr);
21650 if (off != NULL_RTX)
21652 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
21653 unsigned HOST_WIDE_INT offset = INTVAL (off);
21655 /* We need a secondary reload when our legitimate_address_p
21656 says the address is good (as otherwise the entire address
21657 will be reloaded), and the offset is not a multiple of
21658 four or we have an address wrap. Address wrap will only
21659 occur for LO_SUMs since legitimate_offset_address_p
21660 rejects addresses for 16-byte mems that will wrap. */
21661 if (GET_CODE (addr) == LO_SUM
21662 ? (1 /* legitimate_address_p allows any offset for lo_sum */
21663 && ((offset & 3) != 0
21664 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
21665 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
21666 && (offset & 3) != 0))
21668 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
21669 if (in_p)
21670 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
21671 : CODE_FOR_reload_di_load);
21672 else
21673 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
21674 : CODE_FOR_reload_di_store);
21675 sri->extra_cost = 2;
21676 ret = NO_REGS;
21677 done_p = true;
21679 else
21680 default_p = true;
21682 else
21683 default_p = true;
21686 if (!done_p && !TARGET_POWERPC64
21687 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
21688 && memory_p
21689 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
21691 rtx addr = XEXP (x, 0);
21692 rtx off = address_offset (addr);
21694 if (off != NULL_RTX)
21696 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
21697 unsigned HOST_WIDE_INT offset = INTVAL (off);
21699 /* We need a secondary reload when our legitimate_address_p
21700 says the address is good (as otherwise the entire address
21701 will be reloaded), and we have a wrap.
21703 legitimate_lo_sum_address_p allows LO_SUM addresses to
21704 have any offset so test for wrap in the low 16 bits.
21706 legitimate_offset_address_p checks for the range
21707 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
21708 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
21709 [0x7ff4,0x7fff] respectively, so test for the
21710 intersection of these ranges, [0x7ffc,0x7fff] and
21711 [0x7ff4,0x7ff7] respectively.
21713 Note that the address we see here may have been
21714 manipulated by legitimize_reload_address. */
21715 if (GET_CODE (addr) == LO_SUM
21716 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
21717 : offset - (0x8000 - extra) < UNITS_PER_WORD)
21719 if (in_p)
21720 sri->icode = CODE_FOR_reload_si_load;
21721 else
21722 sri->icode = CODE_FOR_reload_si_store;
21723 sri->extra_cost = 2;
21724 ret = NO_REGS;
21725 done_p = true;
21727 else
21728 default_p = true;
21730 else
21731 default_p = true;
21734 if (!done_p)
21735 default_p = true;
21737 if (default_p)
21738 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
21740 gcc_assert (ret != ALL_REGS);
21742 if (TARGET_DEBUG_ADDR)
21744 fprintf (stderr,
21745 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
21746 "mode = %s",
21747 reg_class_names[ret],
21748 in_p ? "true" : "false",
21749 reg_class_names[rclass],
21750 GET_MODE_NAME (mode));
21752 if (reload_completed)
21753 fputs (", after reload", stderr);
21755 if (!done_p)
21756 fputs (", done_p not set", stderr);
21758 if (default_p)
21759 fputs (", default secondary reload", stderr);
21761 if (sri->icode != CODE_FOR_nothing)
21762 fprintf (stderr, ", reload func = %s, extra cost = %d",
21763 insn_data[sri->icode].name, sri->extra_cost);
21765 else if (sri->extra_cost > 0)
21766 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
21768 fputs ("\n", stderr);
21769 debug_rtx (x);
21772 return ret;
21775 /* Better tracing for rs6000_secondary_reload_inner. */
21777 static void
21778 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
21779 bool store_p)
21781 rtx set, clobber;
21783 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
21785 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
21786 store_p ? "store" : "load");
21788 if (store_p)
21789 set = gen_rtx_SET (mem, reg);
21790 else
21791 set = gen_rtx_SET (reg, mem);
21793 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
21794 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
21797 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
21798 ATTRIBUTE_NORETURN;
21800 static void
21801 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
21802 bool store_p)
21804 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
21805 gcc_unreachable ();
21808 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
21809 reload helper functions. These were identified in
21810 rs6000_secondary_reload_memory, and if reload decided to use the secondary
21811 reload, it calls the insns:
21812 reload_<RELOAD:mode>_<P:mptrsize>_store
21813 reload_<RELOAD:mode>_<P:mptrsize>_load
21815 which in turn calls this function, to do whatever is necessary to create
21816 valid addresses. */
21818 void
21819 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
21821 int regno = true_regnum (reg);
21822 machine_mode mode = GET_MODE (reg);
21823 addr_mask_type addr_mask;
21824 rtx addr;
21825 rtx new_addr;
21826 rtx op_reg, op0, op1;
21827 rtx and_op;
21828 rtx cc_clobber;
21829 rtvec rv;
21831 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
21832 || !base_reg_operand (scratch, GET_MODE (scratch)))
21833 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21835 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
21836 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21838 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
21839 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21841 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
21842 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21844 else
21845 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21847 /* Make sure the mode is valid in this register class. */
21848 if ((addr_mask & RELOAD_REG_VALID) == 0)
21849 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21851 if (TARGET_DEBUG_ADDR)
21852 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
21854 new_addr = addr = XEXP (mem, 0);
21855 switch (GET_CODE (addr))
21857 /* Does the register class support auto update forms for this mode? If
21858 not, do the update now. We don't need a scratch register, since the
21859 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
21860 case PRE_INC:
21861 case PRE_DEC:
21862 op_reg = XEXP (addr, 0);
21863 if (!base_reg_operand (op_reg, Pmode))
21864 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21866 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21868 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
21869 new_addr = op_reg;
21871 break;
21873 case PRE_MODIFY:
21874 op0 = XEXP (addr, 0);
21875 op1 = XEXP (addr, 1);
21876 if (!base_reg_operand (op0, Pmode)
21877 || GET_CODE (op1) != PLUS
21878 || !rtx_equal_p (op0, XEXP (op1, 0)))
21879 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21881 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
21883 emit_insn (gen_rtx_SET (op0, op1));
21884 new_addr = reg;
21886 break;
21888 /* Do we need to simulate AND -16 to clear the bottom address bits used
21889 in VMX load/stores? */
21890 case AND:
21891 op0 = XEXP (addr, 0);
21892 op1 = XEXP (addr, 1);
21893 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
21895 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
21896 op_reg = op0;
21898 else if (GET_CODE (op1) == PLUS)
21900 emit_insn (gen_rtx_SET (scratch, op1));
21901 op_reg = scratch;
21904 else
21905 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21907 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
21908 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
21909 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
21910 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
21911 new_addr = scratch;
21913 break;
21915 /* If this is an indirect address, make sure it is a base register. */
21916 case REG:
21917 case SUBREG:
21918 if (!base_reg_operand (addr, GET_MODE (addr)))
21920 emit_insn (gen_rtx_SET (scratch, addr));
21921 new_addr = scratch;
21923 break;
21925 /* If this is an indexed address, make sure the register class can handle
21926 indexed addresses for this mode. */
21927 case PLUS:
21928 op0 = XEXP (addr, 0);
21929 op1 = XEXP (addr, 1);
21930 if (!base_reg_operand (op0, Pmode))
21931 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21933 else if (int_reg_operand (op1, Pmode))
21935 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21937 emit_insn (gen_rtx_SET (scratch, addr));
21938 new_addr = scratch;
21942 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
21944 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
21945 || !quad_address_p (addr, mode, false))
21947 emit_insn (gen_rtx_SET (scratch, addr));
21948 new_addr = scratch;
21952 /* Make sure the register class can handle offset addresses. */
21953 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
21955 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21957 emit_insn (gen_rtx_SET (scratch, addr));
21958 new_addr = scratch;
21962 else
21963 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21965 break;
21967 case LO_SUM:
21968 op0 = XEXP (addr, 0);
21969 op1 = XEXP (addr, 1);
21970 if (!base_reg_operand (op0, Pmode))
21971 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21973 else if (int_reg_operand (op1, Pmode))
21975 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21977 emit_insn (gen_rtx_SET (scratch, addr));
21978 new_addr = scratch;
21982 /* Quad offsets are restricted and can't handle normal addresses. */
21983 else if (mode_supports_vsx_dform_quad (mode))
21985 emit_insn (gen_rtx_SET (scratch, addr));
21986 new_addr = scratch;
21989 /* Make sure the register class can handle offset addresses. */
21990 else if (legitimate_lo_sum_address_p (mode, addr, false))
21992 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21994 emit_insn (gen_rtx_SET (scratch, addr));
21995 new_addr = scratch;
21999 else
22000 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22002 break;
22004 case SYMBOL_REF:
22005 case CONST:
22006 case LABEL_REF:
22007 rs6000_emit_move (scratch, addr, Pmode);
22008 new_addr = scratch;
22009 break;
22011 default:
22012 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22015 /* Adjust the address if it changed. */
22016 if (addr != new_addr)
22018 mem = replace_equiv_address_nv (mem, new_addr);
22019 if (TARGET_DEBUG_ADDR)
22020 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22023 /* Now create the move. */
22024 if (store_p)
22025 emit_insn (gen_rtx_SET (mem, reg));
22026 else
22027 emit_insn (gen_rtx_SET (reg, mem));
22029 return;
22032 /* Convert reloads involving 64-bit gprs and misaligned offset
22033 addressing, or multiple 32-bit gprs and offsets that are too large,
22034 to use indirect addressing. */
22036 void
22037 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
22039 int regno = true_regnum (reg);
22040 enum reg_class rclass;
22041 rtx addr;
22042 rtx scratch_or_premodify = scratch;
22044 if (TARGET_DEBUG_ADDR)
22046 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
22047 store_p ? "store" : "load");
22048 fprintf (stderr, "reg:\n");
22049 debug_rtx (reg);
22050 fprintf (stderr, "mem:\n");
22051 debug_rtx (mem);
22052 fprintf (stderr, "scratch:\n");
22053 debug_rtx (scratch);
22056 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
22057 gcc_assert (GET_CODE (mem) == MEM);
22058 rclass = REGNO_REG_CLASS (regno);
22059 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
22060 addr = XEXP (mem, 0);
22062 if (GET_CODE (addr) == PRE_MODIFY)
22064 gcc_assert (REG_P (XEXP (addr, 0))
22065 && GET_CODE (XEXP (addr, 1)) == PLUS
22066 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
22067 scratch_or_premodify = XEXP (addr, 0);
22068 if (!HARD_REGISTER_P (scratch_or_premodify))
22069 /* If we have a pseudo here then reload will have arranged
22070 to have it replaced, but only in the original insn.
22071 Use the replacement here too. */
22072 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
22074 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22075 expressions from the original insn, without unsharing them.
22076 Any RTL that points into the original insn will of course
22077 have register replacements applied. That is why we don't
22078 need to look for replacements under the PLUS. */
22079 addr = XEXP (addr, 1);
22081 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
22083 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
22085 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
22087 /* Now create the move. */
22088 if (store_p)
22089 emit_insn (gen_rtx_SET (mem, reg));
22090 else
22091 emit_insn (gen_rtx_SET (reg, mem));
22093 return;
22096 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22097 this function has any SDmode references. If we are on a power7 or later, we
22098 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22099 can load/store the value. */
22101 static void
22102 rs6000_alloc_sdmode_stack_slot (void)
22104 tree t;
22105 basic_block bb;
22106 gimple_stmt_iterator gsi;
22108 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
22109 /* We use a different approach for dealing with the secondary
22110 memory in LRA. */
22111 if (ira_use_lra_p)
22112 return;
22114 if (TARGET_NO_SDMODE_STACK)
22115 return;
22117 FOR_EACH_BB_FN (bb, cfun)
22118 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
22120 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
22121 if (ret)
22123 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22124 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22125 SDmode, 0);
22126 return;
22130 /* Check for any SDmode parameters of the function. */
22131 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
22133 if (TREE_TYPE (t) == error_mark_node)
22134 continue;
22136 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
22137 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
22139 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22140 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22141 SDmode, 0);
22142 return;
22147 static void
22148 rs6000_instantiate_decls (void)
22150 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
22151 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
22154 /* Given an rtx X being reloaded into a reg required to be
22155 in class CLASS, return the class of reg to actually use.
22156 In general this is just CLASS; but on some machines
22157 in some cases it is preferable to use a more restrictive class.
22159 On the RS/6000, we have to return NO_REGS when we want to reload a
22160 floating-point CONST_DOUBLE to force it to be copied to memory.
22162 We also don't want to reload integer values into floating-point
22163 registers if we can at all help it. In fact, this can
22164 cause reload to die, if it tries to generate a reload of CTR
22165 into a FP register and discovers it doesn't have the memory location
22166 required.
22168 ??? Would it be a good idea to have reload do the converse, that is
22169 try to reload floating modes into FP registers if possible?
22172 static enum reg_class
22173 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
22175 machine_mode mode = GET_MODE (x);
22176 bool is_constant = CONSTANT_P (x);
22178 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
22179 reload class for it. */
22180 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22181 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
22182 return NO_REGS;
22184 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
22185 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
22186 return NO_REGS;
22188 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
22189 the reloading of address expressions using PLUS into floating point
22190 registers. */
22191 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
22193 if (is_constant)
22195 /* Zero is always allowed in all VSX registers. */
22196 if (x == CONST0_RTX (mode))
22197 return rclass;
22199 /* If this is a vector constant that can be formed with a few Altivec
22200 instructions, we want altivec registers. */
22201 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
22202 return ALTIVEC_REGS;
22204 /* If this is an integer constant that can easily be loaded into
22205 vector registers, allow it. */
22206 if (CONST_INT_P (x))
22208 HOST_WIDE_INT value = INTVAL (x);
22210 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
22211 2.06 can generate it in the Altivec registers with
22212 VSPLTI<x>. */
22213 if (value == -1)
22215 if (TARGET_P8_VECTOR)
22216 return rclass;
22217 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22218 return ALTIVEC_REGS;
22219 else
22220 return NO_REGS;
22223 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
22224 a sign extend in the Altivec registers. */
22225 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
22226 && TARGET_VSX_SMALL_INTEGER
22227 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
22228 return ALTIVEC_REGS;
22231 /* Force constant to memory. */
22232 return NO_REGS;
22235 /* D-form addressing can easily reload the value. */
22236 if (mode_supports_vmx_dform (mode)
22237 || mode_supports_vsx_dform_quad (mode))
22238 return rclass;
22240 /* If this is a scalar floating point value and we don't have D-form
22241 addressing, prefer the traditional floating point registers so that we
22242 can use D-form (register+offset) addressing. */
22243 if (rclass == VSX_REGS
22244 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
22245 return FLOAT_REGS;
22247 /* Prefer the Altivec registers if Altivec is handling the vector
22248 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
22249 loads. */
22250 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
22251 || mode == V1TImode)
22252 return ALTIVEC_REGS;
22254 return rclass;
22257 if (is_constant || GET_CODE (x) == PLUS)
22259 if (reg_class_subset_p (GENERAL_REGS, rclass))
22260 return GENERAL_REGS;
22261 if (reg_class_subset_p (BASE_REGS, rclass))
22262 return BASE_REGS;
22263 return NO_REGS;
22266 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
22267 return GENERAL_REGS;
22269 return rclass;
22272 /* Debug version of rs6000_preferred_reload_class. */
22273 static enum reg_class
22274 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
22276 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
22278 fprintf (stderr,
22279 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
22280 "mode = %s, x:\n",
22281 reg_class_names[ret], reg_class_names[rclass],
22282 GET_MODE_NAME (GET_MODE (x)));
22283 debug_rtx (x);
22285 return ret;
22288 /* If we are copying between FP or AltiVec registers and anything else, we need
22289 a memory location. The exception is when we are targeting ppc64 and the
22290 move to/from fpr to gpr instructions are available. Also, under VSX, you
22291 can copy vector registers from the FP register set to the Altivec register
22292 set and vice versa. */
22294 static bool
22295 rs6000_secondary_memory_needed (enum reg_class from_class,
22296 enum reg_class to_class,
22297 machine_mode mode)
22299 enum rs6000_reg_type from_type, to_type;
22300 bool altivec_p = ((from_class == ALTIVEC_REGS)
22301 || (to_class == ALTIVEC_REGS));
22303 /* If a simple/direct move is available, we don't need secondary memory */
22304 from_type = reg_class_to_reg_type[(int)from_class];
22305 to_type = reg_class_to_reg_type[(int)to_class];
22307 if (rs6000_secondary_reload_move (to_type, from_type, mode,
22308 (secondary_reload_info *)0, altivec_p))
22309 return false;
22311 /* If we have a floating point or vector register class, we need to use
22312 memory to transfer the data. */
22313 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
22314 return true;
22316 return false;
22319 /* Debug version of rs6000_secondary_memory_needed. */
22320 static bool
22321 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
22322 enum reg_class to_class,
22323 machine_mode mode)
22325 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
22327 fprintf (stderr,
22328 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
22329 "to_class = %s, mode = %s\n",
22330 ret ? "true" : "false",
22331 reg_class_names[from_class],
22332 reg_class_names[to_class],
22333 GET_MODE_NAME (mode));
22335 return ret;
22338 /* Return the register class of a scratch register needed to copy IN into
22339 or out of a register in RCLASS in MODE. If it can be done directly,
22340 NO_REGS is returned. */
22342 static enum reg_class
22343 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
22344 rtx in)
22346 int regno;
22348 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
22349 #if TARGET_MACHO
22350 && MACHOPIC_INDIRECT
22351 #endif
22354 /* We cannot copy a symbolic operand directly into anything
22355 other than BASE_REGS for TARGET_ELF. So indicate that a
22356 register from BASE_REGS is needed as an intermediate
22357 register.
22359 On Darwin, pic addresses require a load from memory, which
22360 needs a base register. */
22361 if (rclass != BASE_REGS
22362 && (GET_CODE (in) == SYMBOL_REF
22363 || GET_CODE (in) == HIGH
22364 || GET_CODE (in) == LABEL_REF
22365 || GET_CODE (in) == CONST))
22366 return BASE_REGS;
22369 if (GET_CODE (in) == REG)
22371 regno = REGNO (in);
22372 if (regno >= FIRST_PSEUDO_REGISTER)
22374 regno = true_regnum (in);
22375 if (regno >= FIRST_PSEUDO_REGISTER)
22376 regno = -1;
22379 else if (GET_CODE (in) == SUBREG)
22381 regno = true_regnum (in);
22382 if (regno >= FIRST_PSEUDO_REGISTER)
22383 regno = -1;
22385 else
22386 regno = -1;
22388 /* If we have VSX register moves, prefer moving scalar values between
22389 Altivec registers and GPR by going via an FPR (and then via memory)
22390 instead of reloading the secondary memory address for Altivec moves. */
22391 if (TARGET_VSX
22392 && GET_MODE_SIZE (mode) < 16
22393 && !mode_supports_vmx_dform (mode)
22394 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
22395 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
22396 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22397 && (regno >= 0 && INT_REGNO_P (regno)))))
22398 return FLOAT_REGS;
22400 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
22401 into anything. */
22402 if (rclass == GENERAL_REGS || rclass == BASE_REGS
22403 || (regno >= 0 && INT_REGNO_P (regno)))
22404 return NO_REGS;
22406 /* Constants, memory, and VSX registers can go into VSX registers (both the
22407 traditional floating point and the altivec registers). */
22408 if (rclass == VSX_REGS
22409 && (regno == -1 || VSX_REGNO_P (regno)))
22410 return NO_REGS;
22412 /* Constants, memory, and FP registers can go into FP registers. */
22413 if ((regno == -1 || FP_REGNO_P (regno))
22414 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
22415 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
22417 /* Memory, and AltiVec registers can go into AltiVec registers. */
22418 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
22419 && rclass == ALTIVEC_REGS)
22420 return NO_REGS;
22422 /* We can copy among the CR registers. */
22423 if ((rclass == CR_REGS || rclass == CR0_REGS)
22424 && regno >= 0 && CR_REGNO_P (regno))
22425 return NO_REGS;
22427 /* Otherwise, we need GENERAL_REGS. */
22428 return GENERAL_REGS;
22431 /* Debug version of rs6000_secondary_reload_class. */
22432 static enum reg_class
22433 rs6000_debug_secondary_reload_class (enum reg_class rclass,
22434 machine_mode mode, rtx in)
22436 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
22437 fprintf (stderr,
22438 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
22439 "mode = %s, input rtx:\n",
22440 reg_class_names[ret], reg_class_names[rclass],
22441 GET_MODE_NAME (mode));
22442 debug_rtx (in);
22444 return ret;
22447 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
22449 static bool
22450 rs6000_cannot_change_mode_class (machine_mode from,
22451 machine_mode to,
22452 enum reg_class rclass)
22454 unsigned from_size = GET_MODE_SIZE (from);
22455 unsigned to_size = GET_MODE_SIZE (to);
22457 if (from_size != to_size)
22459 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
22461 if (reg_classes_intersect_p (xclass, rclass))
22463 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
22464 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
22465 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
22466 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
22468 /* Don't allow 64-bit types to overlap with 128-bit types that take a
22469 single register under VSX because the scalar part of the register
22470 is in the upper 64-bits, and not the lower 64-bits. Types like
22471 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
22472 IEEE floating point can't overlap, and neither can small
22473 values. */
22475 if (to_float128_vector_p && from_float128_vector_p)
22476 return false;
22478 else if (to_float128_vector_p || from_float128_vector_p)
22479 return true;
22481 /* TDmode in floating-mode registers must always go into a register
22482 pair with the most significant word in the even-numbered register
22483 to match ISA requirements. In little-endian mode, this does not
22484 match subreg numbering, so we cannot allow subregs. */
22485 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
22486 return true;
22488 if (from_size < 8 || to_size < 8)
22489 return true;
22491 if (from_size == 8 && (8 * to_nregs) != to_size)
22492 return true;
22494 if (to_size == 8 && (8 * from_nregs) != from_size)
22495 return true;
22497 return false;
22499 else
22500 return false;
22503 if (TARGET_E500_DOUBLE
22504 && ((((to) == DFmode) + ((from) == DFmode)) == 1
22505 || (((to) == TFmode) + ((from) == TFmode)) == 1
22506 || (((to) == IFmode) + ((from) == IFmode)) == 1
22507 || (((to) == KFmode) + ((from) == KFmode)) == 1
22508 || (((to) == DDmode) + ((from) == DDmode)) == 1
22509 || (((to) == TDmode) + ((from) == TDmode)) == 1
22510 || (((to) == DImode) + ((from) == DImode)) == 1))
22511 return true;
22513 /* Since the VSX register set includes traditional floating point registers
22514 and altivec registers, just check for the size being different instead of
22515 trying to check whether the modes are vector modes. Otherwise it won't
22516 allow say DF and DI to change classes. For types like TFmode and TDmode
22517 that take 2 64-bit registers, rather than a single 128-bit register, don't
22518 allow subregs of those types to other 128 bit types. */
22519 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
22521 unsigned num_regs = (from_size + 15) / 16;
22522 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
22523 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
22524 return true;
22526 return (from_size != 8 && from_size != 16);
22529 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
22530 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
22531 return true;
22533 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
22534 && reg_classes_intersect_p (GENERAL_REGS, rclass))
22535 return true;
22537 return false;
22540 /* Debug version of rs6000_cannot_change_mode_class. */
22541 static bool
22542 rs6000_debug_cannot_change_mode_class (machine_mode from,
22543 machine_mode to,
22544 enum reg_class rclass)
22546 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
22548 fprintf (stderr,
22549 "rs6000_cannot_change_mode_class, return %s, from = %s, "
22550 "to = %s, rclass = %s\n",
22551 ret ? "true" : "false",
22552 GET_MODE_NAME (from), GET_MODE_NAME (to),
22553 reg_class_names[rclass]);
22555 return ret;
22558 /* Return a string to do a move operation of 128 bits of data. */
22560 const char *
22561 rs6000_output_move_128bit (rtx operands[])
22563 rtx dest = operands[0];
22564 rtx src = operands[1];
22565 machine_mode mode = GET_MODE (dest);
22566 int dest_regno;
22567 int src_regno;
22568 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
22569 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
22571 if (REG_P (dest))
22573 dest_regno = REGNO (dest);
22574 dest_gpr_p = INT_REGNO_P (dest_regno);
22575 dest_fp_p = FP_REGNO_P (dest_regno);
22576 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
22577 dest_vsx_p = dest_fp_p | dest_vmx_p;
22579 else
22581 dest_regno = -1;
22582 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
22585 if (REG_P (src))
22587 src_regno = REGNO (src);
22588 src_gpr_p = INT_REGNO_P (src_regno);
22589 src_fp_p = FP_REGNO_P (src_regno);
22590 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
22591 src_vsx_p = src_fp_p | src_vmx_p;
22593 else
22595 src_regno = -1;
22596 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
22599 /* Register moves. */
22600 if (dest_regno >= 0 && src_regno >= 0)
22602 if (dest_gpr_p)
22604 if (src_gpr_p)
22605 return "#";
22607 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
22608 return (WORDS_BIG_ENDIAN
22609 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
22610 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
22612 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
22613 return "#";
22616 else if (TARGET_VSX && dest_vsx_p)
22618 if (src_vsx_p)
22619 return "xxlor %x0,%x1,%x1";
22621 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
22622 return (WORDS_BIG_ENDIAN
22623 ? "mtvsrdd %x0,%1,%L1"
22624 : "mtvsrdd %x0,%L1,%1");
22626 else if (TARGET_DIRECT_MOVE && src_gpr_p)
22627 return "#";
22630 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
22631 return "vor %0,%1,%1";
22633 else if (dest_fp_p && src_fp_p)
22634 return "#";
22637 /* Loads. */
22638 else if (dest_regno >= 0 && MEM_P (src))
22640 if (dest_gpr_p)
22642 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
22643 return "lq %0,%1";
22644 else
22645 return "#";
22648 else if (TARGET_ALTIVEC && dest_vmx_p
22649 && altivec_indexed_or_indirect_operand (src, mode))
22650 return "lvx %0,%y1";
22652 else if (TARGET_VSX && dest_vsx_p)
22654 if (mode_supports_vsx_dform_quad (mode)
22655 && quad_address_p (XEXP (src, 0), mode, true))
22656 return "lxv %x0,%1";
22658 else if (TARGET_P9_VECTOR)
22659 return "lxvx %x0,%y1";
22661 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
22662 return "lxvw4x %x0,%y1";
22664 else
22665 return "lxvd2x %x0,%y1";
22668 else if (TARGET_ALTIVEC && dest_vmx_p)
22669 return "lvx %0,%y1";
22671 else if (dest_fp_p)
22672 return "#";
22675 /* Stores. */
22676 else if (src_regno >= 0 && MEM_P (dest))
22678 if (src_gpr_p)
22680 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
22681 return "stq %1,%0";
22682 else
22683 return "#";
22686 else if (TARGET_ALTIVEC && src_vmx_p
22687 && altivec_indexed_or_indirect_operand (src, mode))
22688 return "stvx %1,%y0";
22690 else if (TARGET_VSX && src_vsx_p)
22692 if (mode_supports_vsx_dform_quad (mode)
22693 && quad_address_p (XEXP (dest, 0), mode, true))
22694 return "stxv %x1,%0";
22696 else if (TARGET_P9_VECTOR)
22697 return "stxvx %x1,%y0";
22699 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
22700 return "stxvw4x %x1,%y0";
22702 else
22703 return "stxvd2x %x1,%y0";
22706 else if (TARGET_ALTIVEC && src_vmx_p)
22707 return "stvx %1,%y0";
22709 else if (src_fp_p)
22710 return "#";
22713 /* Constants. */
22714 else if (dest_regno >= 0
22715 && (GET_CODE (src) == CONST_INT
22716 || GET_CODE (src) == CONST_WIDE_INT
22717 || GET_CODE (src) == CONST_DOUBLE
22718 || GET_CODE (src) == CONST_VECTOR))
22720 if (dest_gpr_p)
22721 return "#";
22723 else if ((dest_vmx_p && TARGET_ALTIVEC)
22724 || (dest_vsx_p && TARGET_VSX))
22725 return output_vec_const_move (operands);
22728 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
22731 /* Validate a 128-bit move. */
22732 bool
22733 rs6000_move_128bit_ok_p (rtx operands[])
22735 machine_mode mode = GET_MODE (operands[0]);
22736 return (gpc_reg_operand (operands[0], mode)
22737 || gpc_reg_operand (operands[1], mode));
22740 /* Return true if a 128-bit move needs to be split. */
22741 bool
22742 rs6000_split_128bit_ok_p (rtx operands[])
22744 if (!reload_completed)
22745 return false;
22747 if (!gpr_or_gpr_p (operands[0], operands[1]))
22748 return false;
22750 if (quad_load_store_p (operands[0], operands[1]))
22751 return false;
22753 return true;
22757 /* Given a comparison operation, return the bit number in CCR to test. We
22758 know this is a valid comparison.
22760 SCC_P is 1 if this is for an scc. That means that %D will have been
22761 used instead of %C, so the bits will be in different places.
22763 Return -1 if OP isn't a valid comparison for some reason. */
22766 ccr_bit (rtx op, int scc_p)
22768 enum rtx_code code = GET_CODE (op);
22769 machine_mode cc_mode;
22770 int cc_regnum;
22771 int base_bit;
22772 rtx reg;
22774 if (!COMPARISON_P (op))
22775 return -1;
22777 reg = XEXP (op, 0);
22779 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
22781 cc_mode = GET_MODE (reg);
22782 cc_regnum = REGNO (reg);
22783 base_bit = 4 * (cc_regnum - CR0_REGNO);
22785 validate_condition_mode (code, cc_mode);
22787 /* When generating a sCOND operation, only positive conditions are
22788 allowed. */
22789 gcc_assert (!scc_p
22790 || code == EQ || code == GT || code == LT || code == UNORDERED
22791 || code == GTU || code == LTU);
22793 switch (code)
22795 case NE:
22796 return scc_p ? base_bit + 3 : base_bit + 2;
22797 case EQ:
22798 return base_bit + 2;
22799 case GT: case GTU: case UNLE:
22800 return base_bit + 1;
22801 case LT: case LTU: case UNGE:
22802 return base_bit;
22803 case ORDERED: case UNORDERED:
22804 return base_bit + 3;
22806 case GE: case GEU:
22807 /* If scc, we will have done a cror to put the bit in the
22808 unordered position. So test that bit. For integer, this is ! LT
22809 unless this is an scc insn. */
22810 return scc_p ? base_bit + 3 : base_bit;
22812 case LE: case LEU:
22813 return scc_p ? base_bit + 3 : base_bit + 1;
22815 default:
22816 gcc_unreachable ();
22820 /* Return the GOT register. */
22823 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
22825 /* The second flow pass currently (June 1999) can't update
22826 regs_ever_live without disturbing other parts of the compiler, so
22827 update it here to make the prolog/epilogue code happy. */
22828 if (!can_create_pseudo_p ()
22829 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
22830 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
22832 crtl->uses_pic_offset_table = 1;
22834 return pic_offset_table_rtx;
22837 static rs6000_stack_t stack_info;
22839 /* Function to init struct machine_function.
22840 This will be called, via a pointer variable,
22841 from push_function_context. */
22843 static struct machine_function *
22844 rs6000_init_machine_status (void)
22846 stack_info.reload_completed = 0;
22847 return ggc_cleared_alloc<machine_function> ();
22850 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
22852 /* Write out a function code label. */
22854 void
22855 rs6000_output_function_entry (FILE *file, const char *fname)
22857 if (fname[0] != '.')
22859 switch (DEFAULT_ABI)
22861 default:
22862 gcc_unreachable ();
22864 case ABI_AIX:
22865 if (DOT_SYMBOLS)
22866 putc ('.', file);
22867 else
22868 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
22869 break;
22871 case ABI_ELFv2:
22872 case ABI_V4:
22873 case ABI_DARWIN:
22874 break;
22878 RS6000_OUTPUT_BASENAME (file, fname);
22881 /* Print an operand. Recognize special options, documented below. */
22883 #if TARGET_ELF
22884 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
22885 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
22886 #else
22887 #define SMALL_DATA_RELOC "sda21"
22888 #define SMALL_DATA_REG 0
22889 #endif
22891 void
22892 print_operand (FILE *file, rtx x, int code)
22894 int i;
22895 unsigned HOST_WIDE_INT uval;
22897 switch (code)
22899 /* %a is output_address. */
22901 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
22902 output_operand. */
22904 case 'D':
22905 /* Like 'J' but get to the GT bit only. */
22906 gcc_assert (REG_P (x));
22908 /* Bit 1 is GT bit. */
22909 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
22911 /* Add one for shift count in rlinm for scc. */
22912 fprintf (file, "%d", i + 1);
22913 return;
22915 case 'e':
22916 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
22917 if (! INT_P (x))
22919 output_operand_lossage ("invalid %%e value");
22920 return;
22923 uval = INTVAL (x);
22924 if ((uval & 0xffff) == 0 && uval != 0)
22925 putc ('s', file);
22926 return;
22928 case 'E':
22929 /* X is a CR register. Print the number of the EQ bit of the CR */
22930 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22931 output_operand_lossage ("invalid %%E value");
22932 else
22933 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
22934 return;
22936 case 'f':
22937 /* X is a CR register. Print the shift count needed to move it
22938 to the high-order four bits. */
22939 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22940 output_operand_lossage ("invalid %%f value");
22941 else
22942 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
22943 return;
22945 case 'F':
22946 /* Similar, but print the count for the rotate in the opposite
22947 direction. */
22948 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22949 output_operand_lossage ("invalid %%F value");
22950 else
22951 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
22952 return;
22954 case 'G':
22955 /* X is a constant integer. If it is negative, print "m",
22956 otherwise print "z". This is to make an aze or ame insn. */
22957 if (GET_CODE (x) != CONST_INT)
22958 output_operand_lossage ("invalid %%G value");
22959 else if (INTVAL (x) >= 0)
22960 putc ('z', file);
22961 else
22962 putc ('m', file);
22963 return;
22965 case 'h':
22966 /* If constant, output low-order five bits. Otherwise, write
22967 normally. */
22968 if (INT_P (x))
22969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
22970 else
22971 print_operand (file, x, 0);
22972 return;
22974 case 'H':
22975 /* If constant, output low-order six bits. Otherwise, write
22976 normally. */
22977 if (INT_P (x))
22978 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
22979 else
22980 print_operand (file, x, 0);
22981 return;
22983 case 'I':
22984 /* Print `i' if this is a constant, else nothing. */
22985 if (INT_P (x))
22986 putc ('i', file);
22987 return;
22989 case 'j':
22990 /* Write the bit number in CCR for jump. */
22991 i = ccr_bit (x, 0);
22992 if (i == -1)
22993 output_operand_lossage ("invalid %%j code");
22994 else
22995 fprintf (file, "%d", i);
22996 return;
22998 case 'J':
22999 /* Similar, but add one for shift count in rlinm for scc and pass
23000 scc flag to `ccr_bit'. */
23001 i = ccr_bit (x, 1);
23002 if (i == -1)
23003 output_operand_lossage ("invalid %%J code");
23004 else
23005 /* If we want bit 31, write a shift count of zero, not 32. */
23006 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23007 return;
23009 case 'k':
23010 /* X must be a constant. Write the 1's complement of the
23011 constant. */
23012 if (! INT_P (x))
23013 output_operand_lossage ("invalid %%k value");
23014 else
23015 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
23016 return;
23018 case 'K':
23019 /* X must be a symbolic constant on ELF. Write an
23020 expression suitable for an 'addi' that adds in the low 16
23021 bits of the MEM. */
23022 if (GET_CODE (x) == CONST)
23024 if (GET_CODE (XEXP (x, 0)) != PLUS
23025 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
23026 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
23027 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
23028 output_operand_lossage ("invalid %%K value");
23030 print_operand_address (file, x);
23031 fputs ("@l", file);
23032 return;
23034 /* %l is output_asm_label. */
23036 case 'L':
23037 /* Write second word of DImode or DFmode reference. Works on register
23038 or non-indexed memory only. */
23039 if (REG_P (x))
23040 fputs (reg_names[REGNO (x) + 1], file);
23041 else if (MEM_P (x))
23043 machine_mode mode = GET_MODE (x);
23044 /* Handle possible auto-increment. Since it is pre-increment and
23045 we have already done it, we can just use an offset of word. */
23046 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23047 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23048 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23049 UNITS_PER_WORD));
23050 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23051 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23052 UNITS_PER_WORD));
23053 else
23054 output_address (mode, XEXP (adjust_address_nv (x, SImode,
23055 UNITS_PER_WORD),
23056 0));
23058 if (small_data_operand (x, GET_MODE (x)))
23059 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23060 reg_names[SMALL_DATA_REG]);
23062 return;
23064 case 'N':
23065 /* Write the number of elements in the vector times 4. */
23066 if (GET_CODE (x) != PARALLEL)
23067 output_operand_lossage ("invalid %%N value");
23068 else
23069 fprintf (file, "%d", XVECLEN (x, 0) * 4);
23070 return;
23072 case 'O':
23073 /* Similar, but subtract 1 first. */
23074 if (GET_CODE (x) != PARALLEL)
23075 output_operand_lossage ("invalid %%O value");
23076 else
23077 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
23078 return;
23080 case 'p':
23081 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23082 if (! INT_P (x)
23083 || INTVAL (x) < 0
23084 || (i = exact_log2 (INTVAL (x))) < 0)
23085 output_operand_lossage ("invalid %%p value");
23086 else
23087 fprintf (file, "%d", i);
23088 return;
23090 case 'P':
23091 /* The operand must be an indirect memory reference. The result
23092 is the register name. */
23093 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
23094 || REGNO (XEXP (x, 0)) >= 32)
23095 output_operand_lossage ("invalid %%P value");
23096 else
23097 fputs (reg_names[REGNO (XEXP (x, 0))], file);
23098 return;
23100 case 'q':
23101 /* This outputs the logical code corresponding to a boolean
23102 expression. The expression may have one or both operands
23103 negated (if one, only the first one). For condition register
23104 logical operations, it will also treat the negated
23105 CR codes as NOTs, but not handle NOTs of them. */
23107 const char *const *t = 0;
23108 const char *s;
23109 enum rtx_code code = GET_CODE (x);
23110 static const char * const tbl[3][3] = {
23111 { "and", "andc", "nor" },
23112 { "or", "orc", "nand" },
23113 { "xor", "eqv", "xor" } };
23115 if (code == AND)
23116 t = tbl[0];
23117 else if (code == IOR)
23118 t = tbl[1];
23119 else if (code == XOR)
23120 t = tbl[2];
23121 else
23122 output_operand_lossage ("invalid %%q value");
23124 if (GET_CODE (XEXP (x, 0)) != NOT)
23125 s = t[0];
23126 else
23128 if (GET_CODE (XEXP (x, 1)) == NOT)
23129 s = t[2];
23130 else
23131 s = t[1];
23134 fputs (s, file);
23136 return;
23138 case 'Q':
23139 if (! TARGET_MFCRF)
23140 return;
23141 fputc (',', file);
23142 /* FALLTHRU */
23144 case 'R':
23145 /* X is a CR register. Print the mask for `mtcrf'. */
23146 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23147 output_operand_lossage ("invalid %%R value");
23148 else
23149 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
23150 return;
23152 case 's':
23153 /* Low 5 bits of 32 - value */
23154 if (! INT_P (x))
23155 output_operand_lossage ("invalid %%s value");
23156 else
23157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
23158 return;
23160 case 't':
23161 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
23162 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
23164 /* Bit 3 is OV bit. */
23165 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
23167 /* If we want bit 31, write a shift count of zero, not 32. */
23168 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23169 return;
23171 case 'T':
23172 /* Print the symbolic name of a branch target register. */
23173 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
23174 && REGNO (x) != CTR_REGNO))
23175 output_operand_lossage ("invalid %%T value");
23176 else if (REGNO (x) == LR_REGNO)
23177 fputs ("lr", file);
23178 else
23179 fputs ("ctr", file);
23180 return;
23182 case 'u':
23183 /* High-order or low-order 16 bits of constant, whichever is non-zero,
23184 for use in unsigned operand. */
23185 if (! INT_P (x))
23187 output_operand_lossage ("invalid %%u value");
23188 return;
23191 uval = INTVAL (x);
23192 if ((uval & 0xffff) == 0)
23193 uval >>= 16;
23195 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
23196 return;
23198 case 'v':
23199 /* High-order 16 bits of constant for use in signed operand. */
23200 if (! INT_P (x))
23201 output_operand_lossage ("invalid %%v value");
23202 else
23203 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
23204 (INTVAL (x) >> 16) & 0xffff);
23205 return;
23207 case 'U':
23208 /* Print `u' if this has an auto-increment or auto-decrement. */
23209 if (MEM_P (x)
23210 && (GET_CODE (XEXP (x, 0)) == PRE_INC
23211 || GET_CODE (XEXP (x, 0)) == PRE_DEC
23212 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
23213 putc ('u', file);
23214 return;
23216 case 'V':
23217 /* Print the trap code for this operand. */
23218 switch (GET_CODE (x))
23220 case EQ:
23221 fputs ("eq", file); /* 4 */
23222 break;
23223 case NE:
23224 fputs ("ne", file); /* 24 */
23225 break;
23226 case LT:
23227 fputs ("lt", file); /* 16 */
23228 break;
23229 case LE:
23230 fputs ("le", file); /* 20 */
23231 break;
23232 case GT:
23233 fputs ("gt", file); /* 8 */
23234 break;
23235 case GE:
23236 fputs ("ge", file); /* 12 */
23237 break;
23238 case LTU:
23239 fputs ("llt", file); /* 2 */
23240 break;
23241 case LEU:
23242 fputs ("lle", file); /* 6 */
23243 break;
23244 case GTU:
23245 fputs ("lgt", file); /* 1 */
23246 break;
23247 case GEU:
23248 fputs ("lge", file); /* 5 */
23249 break;
23250 default:
23251 gcc_unreachable ();
23253 break;
23255 case 'w':
23256 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
23257 normally. */
23258 if (INT_P (x))
23259 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
23260 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
23261 else
23262 print_operand (file, x, 0);
23263 return;
23265 case 'x':
23266 /* X is a FPR or Altivec register used in a VSX context. */
23267 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
23268 output_operand_lossage ("invalid %%x value");
23269 else
23271 int reg = REGNO (x);
23272 int vsx_reg = (FP_REGNO_P (reg)
23273 ? reg - 32
23274 : reg - FIRST_ALTIVEC_REGNO + 32);
23276 #ifdef TARGET_REGNAMES
23277 if (TARGET_REGNAMES)
23278 fprintf (file, "%%vs%d", vsx_reg);
23279 else
23280 #endif
23281 fprintf (file, "%d", vsx_reg);
23283 return;
23285 case 'X':
23286 if (MEM_P (x)
23287 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
23288 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
23289 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
23290 putc ('x', file);
23291 return;
23293 case 'Y':
23294 /* Like 'L', for third word of TImode/PTImode */
23295 if (REG_P (x))
23296 fputs (reg_names[REGNO (x) + 2], file);
23297 else if (MEM_P (x))
23299 machine_mode mode = GET_MODE (x);
23300 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23301 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23302 output_address (mode, plus_constant (Pmode,
23303 XEXP (XEXP (x, 0), 0), 8));
23304 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23305 output_address (mode, plus_constant (Pmode,
23306 XEXP (XEXP (x, 0), 0), 8));
23307 else
23308 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
23309 if (small_data_operand (x, GET_MODE (x)))
23310 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23311 reg_names[SMALL_DATA_REG]);
23313 return;
23315 case 'z':
23316 /* X is a SYMBOL_REF. Write out the name preceded by a
23317 period and without any trailing data in brackets. Used for function
23318 names. If we are configured for System V (or the embedded ABI) on
23319 the PowerPC, do not emit the period, since those systems do not use
23320 TOCs and the like. */
23321 gcc_assert (GET_CODE (x) == SYMBOL_REF);
23323 /* For macho, check to see if we need a stub. */
23324 if (TARGET_MACHO)
23326 const char *name = XSTR (x, 0);
23327 #if TARGET_MACHO
23328 if (darwin_emit_branch_islands
23329 && MACHOPIC_INDIRECT
23330 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
23331 name = machopic_indirection_name (x, /*stub_p=*/true);
23332 #endif
23333 assemble_name (file, name);
23335 else if (!DOT_SYMBOLS)
23336 assemble_name (file, XSTR (x, 0));
23337 else
23338 rs6000_output_function_entry (file, XSTR (x, 0));
23339 return;
23341 case 'Z':
23342 /* Like 'L', for last word of TImode/PTImode. */
23343 if (REG_P (x))
23344 fputs (reg_names[REGNO (x) + 3], file);
23345 else if (MEM_P (x))
23347 machine_mode mode = GET_MODE (x);
23348 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23349 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23350 output_address (mode, plus_constant (Pmode,
23351 XEXP (XEXP (x, 0), 0), 12));
23352 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23353 output_address (mode, plus_constant (Pmode,
23354 XEXP (XEXP (x, 0), 0), 12));
23355 else
23356 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
23357 if (small_data_operand (x, GET_MODE (x)))
23358 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23359 reg_names[SMALL_DATA_REG]);
23361 return;
23363 /* Print AltiVec or SPE memory operand. */
23364 case 'y':
23366 rtx tmp;
23368 gcc_assert (MEM_P (x));
23370 tmp = XEXP (x, 0);
23372 /* Ugly hack because %y is overloaded. */
23373 if ((TARGET_SPE || TARGET_E500_DOUBLE)
23374 && (GET_MODE_SIZE (GET_MODE (x)) == 8
23375 || FLOAT128_2REG_P (GET_MODE (x))
23376 || GET_MODE (x) == TImode
23377 || GET_MODE (x) == PTImode))
23379 /* Handle [reg]. */
23380 if (REG_P (tmp))
23382 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
23383 break;
23385 /* Handle [reg+UIMM]. */
23386 else if (GET_CODE (tmp) == PLUS &&
23387 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
23389 int x;
23391 gcc_assert (REG_P (XEXP (tmp, 0)));
23393 x = INTVAL (XEXP (tmp, 1));
23394 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
23395 break;
23398 /* Fall through. Must be [reg+reg]. */
23400 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
23401 && GET_CODE (tmp) == AND
23402 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
23403 && INTVAL (XEXP (tmp, 1)) == -16)
23404 tmp = XEXP (tmp, 0);
23405 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
23406 && GET_CODE (tmp) == PRE_MODIFY)
23407 tmp = XEXP (tmp, 1);
23408 if (REG_P (tmp))
23409 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
23410 else
23412 if (GET_CODE (tmp) != PLUS
23413 || !REG_P (XEXP (tmp, 0))
23414 || !REG_P (XEXP (tmp, 1)))
23416 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
23417 break;
23420 if (REGNO (XEXP (tmp, 0)) == 0)
23421 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
23422 reg_names[ REGNO (XEXP (tmp, 0)) ]);
23423 else
23424 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
23425 reg_names[ REGNO (XEXP (tmp, 1)) ]);
23427 break;
23430 case 0:
23431 if (REG_P (x))
23432 fprintf (file, "%s", reg_names[REGNO (x)]);
23433 else if (MEM_P (x))
23435 /* We need to handle PRE_INC and PRE_DEC here, since we need to
23436 know the width from the mode. */
23437 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
23438 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
23439 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
23440 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
23441 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
23442 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
23443 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23444 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
23445 else
23446 output_address (GET_MODE (x), XEXP (x, 0));
23448 else
23450 if (toc_relative_expr_p (x, false))
23451 /* This hack along with a corresponding hack in
23452 rs6000_output_addr_const_extra arranges to output addends
23453 where the assembler expects to find them. eg.
23454 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
23455 without this hack would be output as "x@toc+4". We
23456 want "x+4@toc". */
23457 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
23458 else
23459 output_addr_const (file, x);
23461 return;
23463 case '&':
23464 if (const char *name = get_some_local_dynamic_name ())
23465 assemble_name (file, name);
23466 else
23467 output_operand_lossage ("'%%&' used without any "
23468 "local dynamic TLS references");
23469 return;
23471 default:
23472 output_operand_lossage ("invalid %%xn code");
23476 /* Print the address of an operand. */
23478 void
23479 print_operand_address (FILE *file, rtx x)
23481 if (REG_P (x))
23482 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
23483 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
23484 || GET_CODE (x) == LABEL_REF)
23486 output_addr_const (file, x);
23487 if (small_data_operand (x, GET_MODE (x)))
23488 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23489 reg_names[SMALL_DATA_REG]);
23490 else
23491 gcc_assert (!TARGET_TOC);
23493 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
23494 && REG_P (XEXP (x, 1)))
23496 if (REGNO (XEXP (x, 0)) == 0)
23497 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
23498 reg_names[ REGNO (XEXP (x, 0)) ]);
23499 else
23500 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
23501 reg_names[ REGNO (XEXP (x, 1)) ]);
23503 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
23504 && GET_CODE (XEXP (x, 1)) == CONST_INT)
23505 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
23506 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
23507 #if TARGET_MACHO
23508 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
23509 && CONSTANT_P (XEXP (x, 1)))
23511 fprintf (file, "lo16(");
23512 output_addr_const (file, XEXP (x, 1));
23513 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
23515 #endif
23516 #if TARGET_ELF
23517 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
23518 && CONSTANT_P (XEXP (x, 1)))
23520 output_addr_const (file, XEXP (x, 1));
23521 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
23523 #endif
23524 else if (toc_relative_expr_p (x, false))
23526 /* This hack along with a corresponding hack in
23527 rs6000_output_addr_const_extra arranges to output addends
23528 where the assembler expects to find them. eg.
23529 (lo_sum (reg 9)
23530 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
23531 without this hack would be output as "x@toc+8@l(9)". We
23532 want "x+8@toc@l(9)". */
23533 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
23534 if (GET_CODE (x) == LO_SUM)
23535 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
23536 else
23537 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
23539 else
23540 gcc_unreachable ();
23543 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
23545 static bool
23546 rs6000_output_addr_const_extra (FILE *file, rtx x)
23548 if (GET_CODE (x) == UNSPEC)
23549 switch (XINT (x, 1))
23551 case UNSPEC_TOCREL:
23552 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
23553 && REG_P (XVECEXP (x, 0, 1))
23554 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
23555 output_addr_const (file, XVECEXP (x, 0, 0));
23556 if (x == tocrel_base && tocrel_offset != const0_rtx)
23558 if (INTVAL (tocrel_offset) >= 0)
23559 fprintf (file, "+");
23560 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
23562 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
23564 putc ('-', file);
23565 assemble_name (file, toc_label_name);
23566 need_toc_init = 1;
23568 else if (TARGET_ELF)
23569 fputs ("@toc", file);
23570 return true;
23572 #if TARGET_MACHO
23573 case UNSPEC_MACHOPIC_OFFSET:
23574 output_addr_const (file, XVECEXP (x, 0, 0));
23575 putc ('-', file);
23576 machopic_output_function_base_name (file);
23577 return true;
23578 #endif
23580 return false;
23583 /* Target hook for assembling integer objects. The PowerPC version has
23584 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
23585 is defined. It also needs to handle DI-mode objects on 64-bit
23586 targets. */
23588 static bool
23589 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
23591 #ifdef RELOCATABLE_NEEDS_FIXUP
23592 /* Special handling for SI values. */
23593 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
23595 static int recurse = 0;
23597 /* For -mrelocatable, we mark all addresses that need to be fixed up in
23598 the .fixup section. Since the TOC section is already relocated, we
23599 don't need to mark it here. We used to skip the text section, but it
23600 should never be valid for relocated addresses to be placed in the text
23601 section. */
23602 if (DEFAULT_ABI == ABI_V4
23603 && (TARGET_RELOCATABLE || flag_pic > 1)
23604 && in_section != toc_section
23605 && !recurse
23606 && !CONST_SCALAR_INT_P (x)
23607 && CONSTANT_P (x))
23609 char buf[256];
23611 recurse = 1;
23612 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
23613 fixuplabelno++;
23614 ASM_OUTPUT_LABEL (asm_out_file, buf);
23615 fprintf (asm_out_file, "\t.long\t(");
23616 output_addr_const (asm_out_file, x);
23617 fprintf (asm_out_file, ")@fixup\n");
23618 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
23619 ASM_OUTPUT_ALIGN (asm_out_file, 2);
23620 fprintf (asm_out_file, "\t.long\t");
23621 assemble_name (asm_out_file, buf);
23622 fprintf (asm_out_file, "\n\t.previous\n");
23623 recurse = 0;
23624 return true;
23626 /* Remove initial .'s to turn a -mcall-aixdesc function
23627 address into the address of the descriptor, not the function
23628 itself. */
23629 else if (GET_CODE (x) == SYMBOL_REF
23630 && XSTR (x, 0)[0] == '.'
23631 && DEFAULT_ABI == ABI_AIX)
23633 const char *name = XSTR (x, 0);
23634 while (*name == '.')
23635 name++;
23637 fprintf (asm_out_file, "\t.long\t%s\n", name);
23638 return true;
23641 #endif /* RELOCATABLE_NEEDS_FIXUP */
23642 return default_assemble_integer (x, size, aligned_p);
23645 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
23646 /* Emit an assembler directive to set symbol visibility for DECL to
23647 VISIBILITY_TYPE. */
23649 static void
23650 rs6000_assemble_visibility (tree decl, int vis)
23652 if (TARGET_XCOFF)
23653 return;
23655 /* Functions need to have their entry point symbol visibility set as
23656 well as their descriptor symbol visibility. */
23657 if (DEFAULT_ABI == ABI_AIX
23658 && DOT_SYMBOLS
23659 && TREE_CODE (decl) == FUNCTION_DECL)
23661 static const char * const visibility_types[] = {
23662 NULL, "protected", "hidden", "internal"
23665 const char *name, *type;
23667 name = ((* targetm.strip_name_encoding)
23668 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
23669 type = visibility_types[vis];
23671 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
23672 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
23674 else
23675 default_assemble_visibility (decl, vis);
23677 #endif
23679 enum rtx_code
23680 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
23682 /* Reversal of FP compares takes care -- an ordered compare
23683 becomes an unordered compare and vice versa. */
23684 if (mode == CCFPmode
23685 && (!flag_finite_math_only
23686 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
23687 || code == UNEQ || code == LTGT))
23688 return reverse_condition_maybe_unordered (code);
23689 else
23690 return reverse_condition (code);
23693 /* Generate a compare for CODE. Return a brand-new rtx that
23694 represents the result of the compare. */
23696 static rtx
23697 rs6000_generate_compare (rtx cmp, machine_mode mode)
23699 machine_mode comp_mode;
23700 rtx compare_result;
23701 enum rtx_code code = GET_CODE (cmp);
23702 rtx op0 = XEXP (cmp, 0);
23703 rtx op1 = XEXP (cmp, 1);
23705 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
23706 comp_mode = CCmode;
23707 else if (FLOAT_MODE_P (mode))
23708 comp_mode = CCFPmode;
23709 else if (code == GTU || code == LTU
23710 || code == GEU || code == LEU)
23711 comp_mode = CCUNSmode;
23712 else if ((code == EQ || code == NE)
23713 && unsigned_reg_p (op0)
23714 && (unsigned_reg_p (op1)
23715 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
23716 /* These are unsigned values, perhaps there will be a later
23717 ordering compare that can be shared with this one. */
23718 comp_mode = CCUNSmode;
23719 else
23720 comp_mode = CCmode;
23722 /* If we have an unsigned compare, make sure we don't have a signed value as
23723 an immediate. */
23724 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
23725 && INTVAL (op1) < 0)
23727 op0 = copy_rtx_if_shared (op0);
23728 op1 = force_reg (GET_MODE (op0), op1);
23729 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
23732 /* First, the compare. */
23733 compare_result = gen_reg_rtx (comp_mode);
23735 /* E500 FP compare instructions on the GPRs. Yuck! */
23736 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
23737 && FLOAT_MODE_P (mode))
23739 rtx cmp, or_result, compare_result2;
23740 machine_mode op_mode = GET_MODE (op0);
23741 bool reverse_p;
23743 if (op_mode == VOIDmode)
23744 op_mode = GET_MODE (op1);
23746 /* First reverse the condition codes that aren't directly supported. */
23747 switch (code)
23749 case NE:
23750 case UNLT:
23751 case UNLE:
23752 case UNGT:
23753 case UNGE:
23754 code = reverse_condition_maybe_unordered (code);
23755 reverse_p = true;
23756 break;
23758 case EQ:
23759 case LT:
23760 case LE:
23761 case GT:
23762 case GE:
23763 reverse_p = false;
23764 break;
23766 default:
23767 gcc_unreachable ();
23770 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
23771 This explains the following mess. */
23773 switch (code)
23775 case EQ:
23776 switch (op_mode)
23778 case SFmode:
23779 cmp = (flag_finite_math_only && !flag_trapping_math)
23780 ? gen_tstsfeq_gpr (compare_result, op0, op1)
23781 : gen_cmpsfeq_gpr (compare_result, op0, op1);
23782 break;
23784 case DFmode:
23785 cmp = (flag_finite_math_only && !flag_trapping_math)
23786 ? gen_tstdfeq_gpr (compare_result, op0, op1)
23787 : gen_cmpdfeq_gpr (compare_result, op0, op1);
23788 break;
23790 case TFmode:
23791 case IFmode:
23792 case KFmode:
23793 cmp = (flag_finite_math_only && !flag_trapping_math)
23794 ? gen_tsttfeq_gpr (compare_result, op0, op1)
23795 : gen_cmptfeq_gpr (compare_result, op0, op1);
23796 break;
23798 default:
23799 gcc_unreachable ();
23801 break;
23803 case GT:
23804 case GE:
23805 switch (op_mode)
23807 case SFmode:
23808 cmp = (flag_finite_math_only && !flag_trapping_math)
23809 ? gen_tstsfgt_gpr (compare_result, op0, op1)
23810 : gen_cmpsfgt_gpr (compare_result, op0, op1);
23811 break;
23813 case DFmode:
23814 cmp = (flag_finite_math_only && !flag_trapping_math)
23815 ? gen_tstdfgt_gpr (compare_result, op0, op1)
23816 : gen_cmpdfgt_gpr (compare_result, op0, op1);
23817 break;
23819 case TFmode:
23820 case IFmode:
23821 case KFmode:
23822 cmp = (flag_finite_math_only && !flag_trapping_math)
23823 ? gen_tsttfgt_gpr (compare_result, op0, op1)
23824 : gen_cmptfgt_gpr (compare_result, op0, op1);
23825 break;
23827 default:
23828 gcc_unreachable ();
23830 break;
23832 case LT:
23833 case LE:
23834 switch (op_mode)
23836 case SFmode:
23837 cmp = (flag_finite_math_only && !flag_trapping_math)
23838 ? gen_tstsflt_gpr (compare_result, op0, op1)
23839 : gen_cmpsflt_gpr (compare_result, op0, op1);
23840 break;
23842 case DFmode:
23843 cmp = (flag_finite_math_only && !flag_trapping_math)
23844 ? gen_tstdflt_gpr (compare_result, op0, op1)
23845 : gen_cmpdflt_gpr (compare_result, op0, op1);
23846 break;
23848 case TFmode:
23849 case IFmode:
23850 case KFmode:
23851 cmp = (flag_finite_math_only && !flag_trapping_math)
23852 ? gen_tsttflt_gpr (compare_result, op0, op1)
23853 : gen_cmptflt_gpr (compare_result, op0, op1);
23854 break;
23856 default:
23857 gcc_unreachable ();
23859 break;
23861 default:
23862 gcc_unreachable ();
23865 /* Synthesize LE and GE from LT/GT || EQ. */
23866 if (code == LE || code == GE)
23868 emit_insn (cmp);
23870 compare_result2 = gen_reg_rtx (CCFPmode);
23872 /* Do the EQ. */
23873 switch (op_mode)
23875 case SFmode:
23876 cmp = (flag_finite_math_only && !flag_trapping_math)
23877 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
23878 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
23879 break;
23881 case DFmode:
23882 cmp = (flag_finite_math_only && !flag_trapping_math)
23883 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
23884 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
23885 break;
23887 case TFmode:
23888 case IFmode:
23889 case KFmode:
23890 cmp = (flag_finite_math_only && !flag_trapping_math)
23891 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
23892 : gen_cmptfeq_gpr (compare_result2, op0, op1);
23893 break;
23895 default:
23896 gcc_unreachable ();
23899 emit_insn (cmp);
23901 /* OR them together. */
23902 or_result = gen_reg_rtx (CCFPmode);
23903 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
23904 compare_result2);
23905 compare_result = or_result;
23908 code = reverse_p ? NE : EQ;
23910 emit_insn (cmp);
23913 /* IEEE 128-bit support in VSX registers when we do not have hardware
23914 support. */
23915 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
23917 rtx libfunc = NULL_RTX;
23918 bool check_nan = false;
23919 rtx dest;
23921 switch (code)
23923 case EQ:
23924 case NE:
23925 libfunc = optab_libfunc (eq_optab, mode);
23926 break;
23928 case GT:
23929 case GE:
23930 libfunc = optab_libfunc (ge_optab, mode);
23931 break;
23933 case LT:
23934 case LE:
23935 libfunc = optab_libfunc (le_optab, mode);
23936 break;
23938 case UNORDERED:
23939 case ORDERED:
23940 libfunc = optab_libfunc (unord_optab, mode);
23941 code = (code == UNORDERED) ? NE : EQ;
23942 break;
23944 case UNGE:
23945 case UNGT:
23946 check_nan = true;
23947 libfunc = optab_libfunc (ge_optab, mode);
23948 code = (code == UNGE) ? GE : GT;
23949 break;
23951 case UNLE:
23952 case UNLT:
23953 check_nan = true;
23954 libfunc = optab_libfunc (le_optab, mode);
23955 code = (code == UNLE) ? LE : LT;
23956 break;
23958 case UNEQ:
23959 case LTGT:
23960 check_nan = true;
23961 libfunc = optab_libfunc (eq_optab, mode);
23962 code = (code = UNEQ) ? EQ : NE;
23963 break;
23965 default:
23966 gcc_unreachable ();
23969 gcc_assert (libfunc);
23971 if (!check_nan)
23972 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
23973 SImode, 2, op0, mode, op1, mode);
23975 /* The library signals an exception for signalling NaNs, so we need to
23976 handle isgreater, etc. by first checking isordered. */
23977 else
23979 rtx ne_rtx, normal_dest, unord_dest;
23980 rtx unord_func = optab_libfunc (unord_optab, mode);
23981 rtx join_label = gen_label_rtx ();
23982 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
23983 rtx unord_cmp = gen_reg_rtx (comp_mode);
23986 /* Test for either value being a NaN. */
23987 gcc_assert (unord_func);
23988 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
23989 SImode, 2, op0, mode, op1,
23990 mode);
23992 /* Set value (0) if either value is a NaN, and jump to the join
23993 label. */
23994 dest = gen_reg_rtx (SImode);
23995 emit_move_insn (dest, const1_rtx);
23996 emit_insn (gen_rtx_SET (unord_cmp,
23997 gen_rtx_COMPARE (comp_mode, unord_dest,
23998 const0_rtx)));
24000 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
24001 emit_jump_insn (gen_rtx_SET (pc_rtx,
24002 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
24003 join_ref,
24004 pc_rtx)));
24006 /* Do the normal comparison, knowing that the values are not
24007 NaNs. */
24008 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24009 SImode, 2, op0, mode, op1,
24010 mode);
24012 emit_insn (gen_cstoresi4 (dest,
24013 gen_rtx_fmt_ee (code, SImode, normal_dest,
24014 const0_rtx),
24015 normal_dest, const0_rtx));
24017 /* Join NaN and non-Nan paths. Compare dest against 0. */
24018 emit_label (join_label);
24019 code = NE;
24022 emit_insn (gen_rtx_SET (compare_result,
24023 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
24026 else
24028 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24029 CLOBBERs to match cmptf_internal2 pattern. */
24030 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
24031 && FLOAT128_IBM_P (GET_MODE (op0))
24032 && TARGET_HARD_FLOAT && TARGET_FPRS)
24033 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24034 gen_rtvec (10,
24035 gen_rtx_SET (compare_result,
24036 gen_rtx_COMPARE (comp_mode, op0, op1)),
24037 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24038 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24039 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24040 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24041 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24042 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24043 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24044 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24045 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
24046 else if (GET_CODE (op1) == UNSPEC
24047 && XINT (op1, 1) == UNSPEC_SP_TEST)
24049 rtx op1b = XVECEXP (op1, 0, 0);
24050 comp_mode = CCEQmode;
24051 compare_result = gen_reg_rtx (CCEQmode);
24052 if (TARGET_64BIT)
24053 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
24054 else
24055 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
24057 else
24058 emit_insn (gen_rtx_SET (compare_result,
24059 gen_rtx_COMPARE (comp_mode, op0, op1)));
24062 /* Some kinds of FP comparisons need an OR operation;
24063 under flag_finite_math_only we don't bother. */
24064 if (FLOAT_MODE_P (mode)
24065 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
24066 && !flag_finite_math_only
24067 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
24068 && (code == LE || code == GE
24069 || code == UNEQ || code == LTGT
24070 || code == UNGT || code == UNLT))
24072 enum rtx_code or1, or2;
24073 rtx or1_rtx, or2_rtx, compare2_rtx;
24074 rtx or_result = gen_reg_rtx (CCEQmode);
24076 switch (code)
24078 case LE: or1 = LT; or2 = EQ; break;
24079 case GE: or1 = GT; or2 = EQ; break;
24080 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
24081 case LTGT: or1 = LT; or2 = GT; break;
24082 case UNGT: or1 = UNORDERED; or2 = GT; break;
24083 case UNLT: or1 = UNORDERED; or2 = LT; break;
24084 default: gcc_unreachable ();
24086 validate_condition_mode (or1, comp_mode);
24087 validate_condition_mode (or2, comp_mode);
24088 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
24089 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
24090 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
24091 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
24092 const_true_rtx);
24093 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
24095 compare_result = or_result;
24096 code = EQ;
24099 validate_condition_mode (code, GET_MODE (compare_result));
24101 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
24105 /* Return the diagnostic message string if the binary operation OP is
24106 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24108 static const char*
24109 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
24110 const_tree type1,
24111 const_tree type2)
24113 enum machine_mode mode1 = TYPE_MODE (type1);
24114 enum machine_mode mode2 = TYPE_MODE (type2);
24116 /* For complex modes, use the inner type. */
24117 if (COMPLEX_MODE_P (mode1))
24118 mode1 = GET_MODE_INNER (mode1);
24120 if (COMPLEX_MODE_P (mode2))
24121 mode2 = GET_MODE_INNER (mode2);
24123 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24124 double to intermix unless -mfloat128-convert. */
24125 if (mode1 == mode2)
24126 return NULL;
24128 if (!TARGET_FLOAT128_CVT)
24130 if ((mode1 == KFmode && mode2 == IFmode)
24131 || (mode1 == IFmode && mode2 == KFmode))
24132 return N_("__float128 and __ibm128 cannot be used in the same "
24133 "expression");
24135 if (TARGET_IEEEQUAD
24136 && ((mode1 == IFmode && mode2 == TFmode)
24137 || (mode1 == TFmode && mode2 == IFmode)))
24138 return N_("__ibm128 and long double cannot be used in the same "
24139 "expression");
24141 if (!TARGET_IEEEQUAD
24142 && ((mode1 == KFmode && mode2 == TFmode)
24143 || (mode1 == TFmode && mode2 == KFmode)))
24144 return N_("__float128 and long double cannot be used in the same "
24145 "expression");
24148 return NULL;
24152 /* Expand floating point conversion to/from __float128 and __ibm128. */
24154 void
24155 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
24157 machine_mode dest_mode = GET_MODE (dest);
24158 machine_mode src_mode = GET_MODE (src);
24159 convert_optab cvt = unknown_optab;
24160 bool do_move = false;
24161 rtx libfunc = NULL_RTX;
24162 rtx dest2;
24163 typedef rtx (*rtx_2func_t) (rtx, rtx);
24164 rtx_2func_t hw_convert = (rtx_2func_t)0;
24165 size_t kf_or_tf;
24167 struct hw_conv_t {
24168 rtx_2func_t from_df;
24169 rtx_2func_t from_sf;
24170 rtx_2func_t from_si_sign;
24171 rtx_2func_t from_si_uns;
24172 rtx_2func_t from_di_sign;
24173 rtx_2func_t from_di_uns;
24174 rtx_2func_t to_df;
24175 rtx_2func_t to_sf;
24176 rtx_2func_t to_si_sign;
24177 rtx_2func_t to_si_uns;
24178 rtx_2func_t to_di_sign;
24179 rtx_2func_t to_di_uns;
24180 } hw_conversions[2] = {
24181 /* convertions to/from KFmode */
24183 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
24184 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
24185 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
24186 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
24187 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
24188 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
24189 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
24190 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
24191 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
24192 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
24193 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
24194 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
24197 /* convertions to/from TFmode */
24199 gen_extenddftf2_hw, /* TFmode <- DFmode. */
24200 gen_extendsftf2_hw, /* TFmode <- SFmode. */
24201 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
24202 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
24203 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
24204 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
24205 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
24206 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
24207 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
24208 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
24209 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
24210 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
24214 if (dest_mode == src_mode)
24215 gcc_unreachable ();
24217 /* Eliminate memory operations. */
24218 if (MEM_P (src))
24219 src = force_reg (src_mode, src);
24221 if (MEM_P (dest))
24223 rtx tmp = gen_reg_rtx (dest_mode);
24224 rs6000_expand_float128_convert (tmp, src, unsigned_p);
24225 rs6000_emit_move (dest, tmp, dest_mode);
24226 return;
24229 /* Convert to IEEE 128-bit floating point. */
24230 if (FLOAT128_IEEE_P (dest_mode))
24232 if (dest_mode == KFmode)
24233 kf_or_tf = 0;
24234 else if (dest_mode == TFmode)
24235 kf_or_tf = 1;
24236 else
24237 gcc_unreachable ();
24239 switch (src_mode)
24241 case DFmode:
24242 cvt = sext_optab;
24243 hw_convert = hw_conversions[kf_or_tf].from_df;
24244 break;
24246 case SFmode:
24247 cvt = sext_optab;
24248 hw_convert = hw_conversions[kf_or_tf].from_sf;
24249 break;
24251 case KFmode:
24252 case IFmode:
24253 case TFmode:
24254 if (FLOAT128_IBM_P (src_mode))
24255 cvt = sext_optab;
24256 else
24257 do_move = true;
24258 break;
24260 case SImode:
24261 if (unsigned_p)
24263 cvt = ufloat_optab;
24264 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
24266 else
24268 cvt = sfloat_optab;
24269 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
24271 break;
24273 case DImode:
24274 if (unsigned_p)
24276 cvt = ufloat_optab;
24277 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
24279 else
24281 cvt = sfloat_optab;
24282 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
24284 break;
24286 default:
24287 gcc_unreachable ();
24291 /* Convert from IEEE 128-bit floating point. */
24292 else if (FLOAT128_IEEE_P (src_mode))
24294 if (src_mode == KFmode)
24295 kf_or_tf = 0;
24296 else if (src_mode == TFmode)
24297 kf_or_tf = 1;
24298 else
24299 gcc_unreachable ();
24301 switch (dest_mode)
24303 case DFmode:
24304 cvt = trunc_optab;
24305 hw_convert = hw_conversions[kf_or_tf].to_df;
24306 break;
24308 case SFmode:
24309 cvt = trunc_optab;
24310 hw_convert = hw_conversions[kf_or_tf].to_sf;
24311 break;
24313 case KFmode:
24314 case IFmode:
24315 case TFmode:
24316 if (FLOAT128_IBM_P (dest_mode))
24317 cvt = trunc_optab;
24318 else
24319 do_move = true;
24320 break;
24322 case SImode:
24323 if (unsigned_p)
24325 cvt = ufix_optab;
24326 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
24328 else
24330 cvt = sfix_optab;
24331 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
24333 break;
24335 case DImode:
24336 if (unsigned_p)
24338 cvt = ufix_optab;
24339 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
24341 else
24343 cvt = sfix_optab;
24344 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
24346 break;
24348 default:
24349 gcc_unreachable ();
24353 /* Both IBM format. */
24354 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
24355 do_move = true;
24357 else
24358 gcc_unreachable ();
24360 /* Handle conversion between TFmode/KFmode. */
24361 if (do_move)
24362 emit_move_insn (dest, gen_lowpart (dest_mode, src));
24364 /* Handle conversion if we have hardware support. */
24365 else if (TARGET_FLOAT128_HW && hw_convert)
24366 emit_insn ((hw_convert) (dest, src));
24368 /* Call an external function to do the conversion. */
24369 else if (cvt != unknown_optab)
24371 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
24372 gcc_assert (libfunc != NULL_RTX);
24374 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
24375 src_mode);
24377 gcc_assert (dest2 != NULL_RTX);
24378 if (!rtx_equal_p (dest, dest2))
24379 emit_move_insn (dest, dest2);
24382 else
24383 gcc_unreachable ();
24385 return;
24388 /* Split a conversion from __float128 to an integer type into separate insns.
24389 OPERANDS points to the destination, source, and V2DI temporary
24390 register. CODE is either FIX or UNSIGNED_FIX. */
24392 void
24393 convert_float128_to_int (rtx *operands, enum rtx_code code)
24395 rtx dest = operands[0];
24396 rtx src = operands[1];
24397 rtx tmp = operands[2];
24398 rtx cvt;
24399 rtvec cvt_vec;
24400 rtx cvt_unspec;
24401 rtvec move_vec;
24402 rtx move_unspec;
24404 if (GET_CODE (tmp) == SCRATCH)
24405 tmp = gen_reg_rtx (V2DImode);
24407 if (MEM_P (dest))
24408 dest = rs6000_address_for_fpconvert (dest);
24410 /* Generate the actual convert insn of the form:
24411 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
24412 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
24413 cvt_vec = gen_rtvec (1, cvt);
24414 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
24415 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
24417 /* Generate the move insn of the form:
24418 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
24419 move_vec = gen_rtvec (1, tmp);
24420 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
24421 emit_insn (gen_rtx_SET (dest, move_unspec));
24424 /* Split a conversion from an integer type to __float128 into separate insns.
24425 OPERANDS points to the destination, source, and V2DI temporary
24426 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
24428 void
24429 convert_int_to_float128 (rtx *operands, enum rtx_code code)
24431 rtx dest = operands[0];
24432 rtx src = operands[1];
24433 rtx tmp = operands[2];
24434 rtx cvt;
24435 rtvec cvt_vec;
24436 rtx cvt_unspec;
24437 rtvec move_vec;
24438 rtx move_unspec;
24439 rtx unsigned_flag;
24441 if (GET_CODE (tmp) == SCRATCH)
24442 tmp = gen_reg_rtx (V2DImode);
24444 if (MEM_P (src))
24445 src = rs6000_address_for_fpconvert (src);
24447 /* Generate the move of the integer into the Altivec register of the form:
24448 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
24449 (const_int 0)] UNSPEC_IEEE128_MOVE)).
24452 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
24454 if (GET_MODE (src) == SImode)
24456 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
24457 move_vec = gen_rtvec (2, src, unsigned_flag);
24459 else
24460 move_vec = gen_rtvec (1, src);
24462 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
24463 emit_insn (gen_rtx_SET (tmp, move_unspec));
24465 /* Generate the actual convert insn of the form:
24466 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
24467 UNSPEC_IEEE128_CONVERT))). */
24468 cvt_vec = gen_rtvec (1, tmp);
24469 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
24470 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
24471 emit_insn (gen_rtx_SET (dest, cvt));
24475 /* Emit the RTL for an sISEL pattern. */
24477 void
24478 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
24480 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
24483 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
24484 can be used as that dest register. Return the dest register. */
24487 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
24489 if (op2 == const0_rtx)
24490 return op1;
24492 if (GET_CODE (scratch) == SCRATCH)
24493 scratch = gen_reg_rtx (mode);
24495 if (logical_operand (op2, mode))
24496 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
24497 else
24498 emit_insn (gen_rtx_SET (scratch,
24499 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
24501 return scratch;
24504 void
24505 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
24507 rtx condition_rtx;
24508 machine_mode op_mode;
24509 enum rtx_code cond_code;
24510 rtx result = operands[0];
24512 condition_rtx = rs6000_generate_compare (operands[1], mode);
24513 cond_code = GET_CODE (condition_rtx);
24515 if (FLOAT_MODE_P (mode)
24516 && !TARGET_FPRS && TARGET_HARD_FLOAT)
24518 rtx t;
24520 PUT_MODE (condition_rtx, SImode);
24521 t = XEXP (condition_rtx, 0);
24523 gcc_assert (cond_code == NE || cond_code == EQ);
24525 if (cond_code == NE)
24526 emit_insn (gen_e500_flip_gt_bit (t, t));
24528 emit_insn (gen_move_from_CR_gt_bit (result, t));
24529 return;
24532 if (cond_code == NE
24533 || cond_code == GE || cond_code == LE
24534 || cond_code == GEU || cond_code == LEU
24535 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
24537 rtx not_result = gen_reg_rtx (CCEQmode);
24538 rtx not_op, rev_cond_rtx;
24539 machine_mode cc_mode;
24541 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
24543 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
24544 SImode, XEXP (condition_rtx, 0), const0_rtx);
24545 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
24546 emit_insn (gen_rtx_SET (not_result, not_op));
24547 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
24550 op_mode = GET_MODE (XEXP (operands[1], 0));
24551 if (op_mode == VOIDmode)
24552 op_mode = GET_MODE (XEXP (operands[1], 1));
24554 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
24556 PUT_MODE (condition_rtx, DImode);
24557 convert_move (result, condition_rtx, 0);
24559 else
24561 PUT_MODE (condition_rtx, SImode);
24562 emit_insn (gen_rtx_SET (result, condition_rtx));
24566 /* Emit a branch of kind CODE to location LOC. */
24568 void
24569 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
24571 rtx condition_rtx, loc_ref;
24573 condition_rtx = rs6000_generate_compare (operands[0], mode);
24574 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
24575 emit_jump_insn (gen_rtx_SET (pc_rtx,
24576 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
24577 loc_ref, pc_rtx)));
24580 /* Return the string to output a conditional branch to LABEL, which is
24581 the operand template of the label, or NULL if the branch is really a
24582 conditional return.
24584 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
24585 condition code register and its mode specifies what kind of
24586 comparison we made.
24588 REVERSED is nonzero if we should reverse the sense of the comparison.
24590 INSN is the insn. */
24592 char *
24593 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
24595 static char string[64];
24596 enum rtx_code code = GET_CODE (op);
24597 rtx cc_reg = XEXP (op, 0);
24598 machine_mode mode = GET_MODE (cc_reg);
24599 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
24600 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
24601 int really_reversed = reversed ^ need_longbranch;
24602 char *s = string;
24603 const char *ccode;
24604 const char *pred;
24605 rtx note;
24607 validate_condition_mode (code, mode);
24609 /* Work out which way this really branches. We could use
24610 reverse_condition_maybe_unordered here always but this
24611 makes the resulting assembler clearer. */
24612 if (really_reversed)
24614 /* Reversal of FP compares takes care -- an ordered compare
24615 becomes an unordered compare and vice versa. */
24616 if (mode == CCFPmode)
24617 code = reverse_condition_maybe_unordered (code);
24618 else
24619 code = reverse_condition (code);
24622 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
24624 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
24625 to the GT bit. */
24626 switch (code)
24628 case EQ:
24629 /* Opposite of GT. */
24630 code = GT;
24631 break;
24633 case NE:
24634 code = UNLE;
24635 break;
24637 default:
24638 gcc_unreachable ();
24642 switch (code)
24644 /* Not all of these are actually distinct opcodes, but
24645 we distinguish them for clarity of the resulting assembler. */
24646 case NE: case LTGT:
24647 ccode = "ne"; break;
24648 case EQ: case UNEQ:
24649 ccode = "eq"; break;
24650 case GE: case GEU:
24651 ccode = "ge"; break;
24652 case GT: case GTU: case UNGT:
24653 ccode = "gt"; break;
24654 case LE: case LEU:
24655 ccode = "le"; break;
24656 case LT: case LTU: case UNLT:
24657 ccode = "lt"; break;
24658 case UNORDERED: ccode = "un"; break;
24659 case ORDERED: ccode = "nu"; break;
24660 case UNGE: ccode = "nl"; break;
24661 case UNLE: ccode = "ng"; break;
24662 default:
24663 gcc_unreachable ();
24666 /* Maybe we have a guess as to how likely the branch is. */
24667 pred = "";
24668 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
24669 if (note != NULL_RTX)
24671 /* PROB is the difference from 50%. */
24672 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
24674 /* Only hint for highly probable/improbable branches on newer cpus when
24675 we have real profile data, as static prediction overrides processor
24676 dynamic prediction. For older cpus we may as well always hint, but
24677 assume not taken for branches that are very close to 50% as a
24678 mispredicted taken branch is more expensive than a
24679 mispredicted not-taken branch. */
24680 if (rs6000_always_hint
24681 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
24682 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
24683 && br_prob_note_reliable_p (note)))
24685 if (abs (prob) > REG_BR_PROB_BASE / 20
24686 && ((prob > 0) ^ need_longbranch))
24687 pred = "+";
24688 else
24689 pred = "-";
24693 if (label == NULL)
24694 s += sprintf (s, "b%slr%s ", ccode, pred);
24695 else
24696 s += sprintf (s, "b%s%s ", ccode, pred);
24698 /* We need to escape any '%' characters in the reg_names string.
24699 Assume they'd only be the first character.... */
24700 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
24701 *s++ = '%';
24702 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
24704 if (label != NULL)
24706 /* If the branch distance was too far, we may have to use an
24707 unconditional branch to go the distance. */
24708 if (need_longbranch)
24709 s += sprintf (s, ",$+8\n\tb %s", label);
24710 else
24711 s += sprintf (s, ",%s", label);
24714 return string;
24717 /* Return the string to flip the GT bit on a CR. */
24718 char *
24719 output_e500_flip_gt_bit (rtx dst, rtx src)
24721 static char string[64];
24722 int a, b;
24724 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
24725 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
24727 /* GT bit. */
24728 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
24729 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
24731 sprintf (string, "crnot %d,%d", a, b);
24732 return string;
24735 /* Return insn for VSX or Altivec comparisons. */
24737 static rtx
24738 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
24740 rtx mask;
24741 machine_mode mode = GET_MODE (op0);
24743 switch (code)
24745 default:
24746 break;
24748 case GE:
24749 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24750 return NULL_RTX;
24751 /* FALLTHRU */
24753 case EQ:
24754 case GT:
24755 case GTU:
24756 case ORDERED:
24757 case UNORDERED:
24758 case UNEQ:
24759 case LTGT:
24760 mask = gen_reg_rtx (mode);
24761 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
24762 return mask;
24765 return NULL_RTX;
24768 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
24769 DMODE is expected destination mode. This is a recursive function. */
24771 static rtx
24772 rs6000_emit_vector_compare (enum rtx_code rcode,
24773 rtx op0, rtx op1,
24774 machine_mode dmode)
24776 rtx mask;
24777 bool swap_operands = false;
24778 bool try_again = false;
24780 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
24781 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
24783 /* See if the comparison works as is. */
24784 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
24785 if (mask)
24786 return mask;
24788 switch (rcode)
24790 case LT:
24791 rcode = GT;
24792 swap_operands = true;
24793 try_again = true;
24794 break;
24795 case LTU:
24796 rcode = GTU;
24797 swap_operands = true;
24798 try_again = true;
24799 break;
24800 case NE:
24801 case UNLE:
24802 case UNLT:
24803 case UNGE:
24804 case UNGT:
24805 /* Invert condition and try again.
24806 e.g., A != B becomes ~(A==B). */
24808 enum rtx_code rev_code;
24809 enum insn_code nor_code;
24810 rtx mask2;
24812 rev_code = reverse_condition_maybe_unordered (rcode);
24813 if (rev_code == UNKNOWN)
24814 return NULL_RTX;
24816 nor_code = optab_handler (one_cmpl_optab, dmode);
24817 if (nor_code == CODE_FOR_nothing)
24818 return NULL_RTX;
24820 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
24821 if (!mask2)
24822 return NULL_RTX;
24824 mask = gen_reg_rtx (dmode);
24825 emit_insn (GEN_FCN (nor_code) (mask, mask2));
24826 return mask;
24828 break;
24829 case GE:
24830 case GEU:
24831 case LE:
24832 case LEU:
24833 /* Try GT/GTU/LT/LTU OR EQ */
24835 rtx c_rtx, eq_rtx;
24836 enum insn_code ior_code;
24837 enum rtx_code new_code;
24839 switch (rcode)
24841 case GE:
24842 new_code = GT;
24843 break;
24845 case GEU:
24846 new_code = GTU;
24847 break;
24849 case LE:
24850 new_code = LT;
24851 break;
24853 case LEU:
24854 new_code = LTU;
24855 break;
24857 default:
24858 gcc_unreachable ();
24861 ior_code = optab_handler (ior_optab, dmode);
24862 if (ior_code == CODE_FOR_nothing)
24863 return NULL_RTX;
24865 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
24866 if (!c_rtx)
24867 return NULL_RTX;
24869 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
24870 if (!eq_rtx)
24871 return NULL_RTX;
24873 mask = gen_reg_rtx (dmode);
24874 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
24875 return mask;
24877 break;
24878 default:
24879 return NULL_RTX;
24882 if (try_again)
24884 if (swap_operands)
24885 std::swap (op0, op1);
24887 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
24888 if (mask)
24889 return mask;
24892 /* You only get two chances. */
24893 return NULL_RTX;
24896 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
24897 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
24898 operands for the relation operation COND. */
24901 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
24902 rtx cond, rtx cc_op0, rtx cc_op1)
24904 machine_mode dest_mode = GET_MODE (dest);
24905 machine_mode mask_mode = GET_MODE (cc_op0);
24906 enum rtx_code rcode = GET_CODE (cond);
24907 machine_mode cc_mode = CCmode;
24908 rtx mask;
24909 rtx cond2;
24910 rtx tmp;
24911 bool invert_move = false;
24913 if (VECTOR_UNIT_NONE_P (dest_mode))
24914 return 0;
24916 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
24917 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
24919 switch (rcode)
24921 /* Swap operands if we can, and fall back to doing the operation as
24922 specified, and doing a NOR to invert the test. */
24923 case NE:
24924 case UNLE:
24925 case UNLT:
24926 case UNGE:
24927 case UNGT:
24928 /* Invert condition and try again.
24929 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
24930 invert_move = true;
24931 rcode = reverse_condition_maybe_unordered (rcode);
24932 if (rcode == UNKNOWN)
24933 return 0;
24934 break;
24936 /* Mark unsigned tests with CCUNSmode. */
24937 case GTU:
24938 case GEU:
24939 case LTU:
24940 case LEU:
24941 cc_mode = CCUNSmode;
24942 break;
24944 default:
24945 break;
24948 /* Get the vector mask for the given relational operations. */
24949 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
24951 if (!mask)
24952 return 0;
24954 if (invert_move)
24956 tmp = op_true;
24957 op_true = op_false;
24958 op_false = tmp;
24961 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
24962 CONST0_RTX (dest_mode));
24963 emit_insn (gen_rtx_SET (dest,
24964 gen_rtx_IF_THEN_ELSE (dest_mode,
24965 cond2,
24966 op_true,
24967 op_false)));
24968 return 1;
24971 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
24972 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
24973 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
24974 hardware has no such operation. */
24976 static int
24977 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24979 enum rtx_code code = GET_CODE (op);
24980 rtx op0 = XEXP (op, 0);
24981 rtx op1 = XEXP (op, 1);
24982 machine_mode compare_mode = GET_MODE (op0);
24983 machine_mode result_mode = GET_MODE (dest);
24984 bool max_p = false;
24986 if (result_mode != compare_mode)
24987 return 0;
24989 if (code == GE || code == GT)
24990 max_p = true;
24991 else if (code == LE || code == LT)
24992 max_p = false;
24993 else
24994 return 0;
24996 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
24999 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
25000 max_p = !max_p;
25002 else
25003 return 0;
25005 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
25006 return 1;
25009 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25010 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25011 operands of the last comparison is nonzero/true, FALSE_COND if it is
25012 zero/false. Return 0 if the hardware has no such operation. */
25014 static int
25015 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25017 enum rtx_code code = GET_CODE (op);
25018 rtx op0 = XEXP (op, 0);
25019 rtx op1 = XEXP (op, 1);
25020 machine_mode result_mode = GET_MODE (dest);
25021 rtx compare_rtx;
25022 rtx cmove_rtx;
25023 rtx clobber_rtx;
25025 if (!can_create_pseudo_p ())
25026 return 0;
25028 switch (code)
25030 case EQ:
25031 case GE:
25032 case GT:
25033 break;
25035 case NE:
25036 case LT:
25037 case LE:
25038 code = swap_condition (code);
25039 std::swap (op0, op1);
25040 break;
25042 default:
25043 return 0;
25046 /* Generate: [(parallel [(set (dest)
25047 (if_then_else (op (cmp1) (cmp2))
25048 (true)
25049 (false)))
25050 (clobber (scratch))])]. */
25052 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
25053 cmove_rtx = gen_rtx_SET (dest,
25054 gen_rtx_IF_THEN_ELSE (result_mode,
25055 compare_rtx,
25056 true_cond,
25057 false_cond));
25059 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
25060 emit_insn (gen_rtx_PARALLEL (VOIDmode,
25061 gen_rtvec (2, cmove_rtx, clobber_rtx)));
25063 return 1;
25066 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25067 operands of the last comparison is nonzero/true, FALSE_COND if it
25068 is zero/false. Return 0 if the hardware has no such operation. */
25071 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25073 enum rtx_code code = GET_CODE (op);
25074 rtx op0 = XEXP (op, 0);
25075 rtx op1 = XEXP (op, 1);
25076 machine_mode compare_mode = GET_MODE (op0);
25077 machine_mode result_mode = GET_MODE (dest);
25078 rtx temp;
25079 bool is_against_zero;
25081 /* These modes should always match. */
25082 if (GET_MODE (op1) != compare_mode
25083 /* In the isel case however, we can use a compare immediate, so
25084 op1 may be a small constant. */
25085 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
25086 return 0;
25087 if (GET_MODE (true_cond) != result_mode)
25088 return 0;
25089 if (GET_MODE (false_cond) != result_mode)
25090 return 0;
25092 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25093 if (TARGET_P9_MINMAX
25094 && (compare_mode == SFmode || compare_mode == DFmode)
25095 && (result_mode == SFmode || result_mode == DFmode))
25097 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
25098 return 1;
25100 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
25101 return 1;
25104 /* Don't allow using floating point comparisons for integer results for
25105 now. */
25106 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
25107 return 0;
25109 /* First, work out if the hardware can do this at all, or
25110 if it's too slow.... */
25111 if (!FLOAT_MODE_P (compare_mode))
25113 if (TARGET_ISEL)
25114 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
25115 return 0;
25117 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
25118 && SCALAR_FLOAT_MODE_P (compare_mode))
25119 return 0;
25121 is_against_zero = op1 == CONST0_RTX (compare_mode);
25123 /* A floating-point subtract might overflow, underflow, or produce
25124 an inexact result, thus changing the floating-point flags, so it
25125 can't be generated if we care about that. It's safe if one side
25126 of the construct is zero, since then no subtract will be
25127 generated. */
25128 if (SCALAR_FLOAT_MODE_P (compare_mode)
25129 && flag_trapping_math && ! is_against_zero)
25130 return 0;
25132 /* Eliminate half of the comparisons by switching operands, this
25133 makes the remaining code simpler. */
25134 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
25135 || code == LTGT || code == LT || code == UNLE)
25137 code = reverse_condition_maybe_unordered (code);
25138 temp = true_cond;
25139 true_cond = false_cond;
25140 false_cond = temp;
25143 /* UNEQ and LTGT take four instructions for a comparison with zero,
25144 it'll probably be faster to use a branch here too. */
25145 if (code == UNEQ && HONOR_NANS (compare_mode))
25146 return 0;
25148 /* We're going to try to implement comparisons by performing
25149 a subtract, then comparing against zero. Unfortunately,
25150 Inf - Inf is NaN which is not zero, and so if we don't
25151 know that the operand is finite and the comparison
25152 would treat EQ different to UNORDERED, we can't do it. */
25153 if (HONOR_INFINITIES (compare_mode)
25154 && code != GT && code != UNGE
25155 && (GET_CODE (op1) != CONST_DOUBLE
25156 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
25157 /* Constructs of the form (a OP b ? a : b) are safe. */
25158 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
25159 || (! rtx_equal_p (op0, true_cond)
25160 && ! rtx_equal_p (op1, true_cond))))
25161 return 0;
25163 /* At this point we know we can use fsel. */
25165 /* Reduce the comparison to a comparison against zero. */
25166 if (! is_against_zero)
25168 temp = gen_reg_rtx (compare_mode);
25169 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
25170 op0 = temp;
25171 op1 = CONST0_RTX (compare_mode);
25174 /* If we don't care about NaNs we can reduce some of the comparisons
25175 down to faster ones. */
25176 if (! HONOR_NANS (compare_mode))
25177 switch (code)
25179 case GT:
25180 code = LE;
25181 temp = true_cond;
25182 true_cond = false_cond;
25183 false_cond = temp;
25184 break;
25185 case UNGE:
25186 code = GE;
25187 break;
25188 case UNEQ:
25189 code = EQ;
25190 break;
25191 default:
25192 break;
25195 /* Now, reduce everything down to a GE. */
25196 switch (code)
25198 case GE:
25199 break;
25201 case LE:
25202 temp = gen_reg_rtx (compare_mode);
25203 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25204 op0 = temp;
25205 break;
25207 case ORDERED:
25208 temp = gen_reg_rtx (compare_mode);
25209 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
25210 op0 = temp;
25211 break;
25213 case EQ:
25214 temp = gen_reg_rtx (compare_mode);
25215 emit_insn (gen_rtx_SET (temp,
25216 gen_rtx_NEG (compare_mode,
25217 gen_rtx_ABS (compare_mode, op0))));
25218 op0 = temp;
25219 break;
25221 case UNGE:
25222 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
25223 temp = gen_reg_rtx (result_mode);
25224 emit_insn (gen_rtx_SET (temp,
25225 gen_rtx_IF_THEN_ELSE (result_mode,
25226 gen_rtx_GE (VOIDmode,
25227 op0, op1),
25228 true_cond, false_cond)));
25229 false_cond = true_cond;
25230 true_cond = temp;
25232 temp = gen_reg_rtx (compare_mode);
25233 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25234 op0 = temp;
25235 break;
25237 case GT:
25238 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
25239 temp = gen_reg_rtx (result_mode);
25240 emit_insn (gen_rtx_SET (temp,
25241 gen_rtx_IF_THEN_ELSE (result_mode,
25242 gen_rtx_GE (VOIDmode,
25243 op0, op1),
25244 true_cond, false_cond)));
25245 true_cond = false_cond;
25246 false_cond = temp;
25248 temp = gen_reg_rtx (compare_mode);
25249 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25250 op0 = temp;
25251 break;
25253 default:
25254 gcc_unreachable ();
25257 emit_insn (gen_rtx_SET (dest,
25258 gen_rtx_IF_THEN_ELSE (result_mode,
25259 gen_rtx_GE (VOIDmode,
25260 op0, op1),
25261 true_cond, false_cond)));
25262 return 1;
25265 /* Same as above, but for ints (isel). */
25267 static int
25268 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25270 rtx condition_rtx, cr;
25271 machine_mode mode = GET_MODE (dest);
25272 enum rtx_code cond_code;
25273 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
25274 bool signedp;
25276 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
25277 return 0;
25279 /* We still have to do the compare, because isel doesn't do a
25280 compare, it just looks at the CRx bits set by a previous compare
25281 instruction. */
25282 condition_rtx = rs6000_generate_compare (op, mode);
25283 cond_code = GET_CODE (condition_rtx);
25284 cr = XEXP (condition_rtx, 0);
25285 signedp = GET_MODE (cr) == CCmode;
25287 isel_func = (mode == SImode
25288 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
25289 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
25291 switch (cond_code)
25293 case LT: case GT: case LTU: case GTU: case EQ:
25294 /* isel handles these directly. */
25295 break;
25297 default:
25298 /* We need to swap the sense of the comparison. */
25300 std::swap (false_cond, true_cond);
25301 PUT_CODE (condition_rtx, reverse_condition (cond_code));
25303 break;
25306 false_cond = force_reg (mode, false_cond);
25307 if (true_cond != const0_rtx)
25308 true_cond = force_reg (mode, true_cond);
25310 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
25312 return 1;
25315 const char *
25316 output_isel (rtx *operands)
25318 enum rtx_code code;
25320 code = GET_CODE (operands[1]);
25322 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
25324 gcc_assert (GET_CODE (operands[2]) == REG
25325 && GET_CODE (operands[3]) == REG);
25326 PUT_CODE (operands[1], reverse_condition (code));
25327 return "isel %0,%3,%2,%j1";
25330 return "isel %0,%2,%3,%j1";
25333 void
25334 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
25336 machine_mode mode = GET_MODE (op0);
25337 enum rtx_code c;
25338 rtx target;
25340 /* VSX/altivec have direct min/max insns. */
25341 if ((code == SMAX || code == SMIN)
25342 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
25343 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
25345 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
25346 return;
25349 if (code == SMAX || code == SMIN)
25350 c = GE;
25351 else
25352 c = GEU;
25354 if (code == SMAX || code == UMAX)
25355 target = emit_conditional_move (dest, c, op0, op1, mode,
25356 op0, op1, mode, 0);
25357 else
25358 target = emit_conditional_move (dest, c, op0, op1, mode,
25359 op1, op0, mode, 0);
25360 gcc_assert (target);
25361 if (target != dest)
25362 emit_move_insn (dest, target);
25365 /* Split a signbit operation on 64-bit machines with direct move. Also allow
25366 for the value to come from memory or if it is already loaded into a GPR. */
25368 void
25369 rs6000_split_signbit (rtx dest, rtx src)
25371 machine_mode d_mode = GET_MODE (dest);
25372 machine_mode s_mode = GET_MODE (src);
25373 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
25374 rtx shift_reg = dest_di;
25376 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
25378 if (MEM_P (src))
25380 rtx mem = (WORDS_BIG_ENDIAN
25381 ? adjust_address (src, DImode, 0)
25382 : adjust_address (src, DImode, 8));
25383 emit_insn (gen_rtx_SET (dest_di, mem));
25386 else
25388 unsigned int r = reg_or_subregno (src);
25390 if (INT_REGNO_P (r))
25391 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
25393 else
25395 /* Generate the special mfvsrd instruction to get it in a GPR. */
25396 gcc_assert (VSX_REGNO_P (r));
25397 if (s_mode == KFmode)
25398 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
25399 else
25400 emit_insn (gen_signbittf2_dm2 (dest_di, src));
25404 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
25405 return;
25408 /* A subroutine of the atomic operation splitters. Jump to LABEL if
25409 COND is true. Mark the jump as unlikely to be taken. */
25411 static void
25412 emit_unlikely_jump (rtx cond, rtx label)
25414 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
25415 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
25416 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
25417 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
25420 /* A subroutine of the atomic operation splitters. Emit a load-locked
25421 instruction in MODE. For QI/HImode, possibly use a pattern than includes
25422 the zero_extend operation. */
25424 static void
25425 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
25427 rtx (*fn) (rtx, rtx) = NULL;
25429 switch (mode)
25431 case QImode:
25432 fn = gen_load_lockedqi;
25433 break;
25434 case HImode:
25435 fn = gen_load_lockedhi;
25436 break;
25437 case SImode:
25438 if (GET_MODE (mem) == QImode)
25439 fn = gen_load_lockedqi_si;
25440 else if (GET_MODE (mem) == HImode)
25441 fn = gen_load_lockedhi_si;
25442 else
25443 fn = gen_load_lockedsi;
25444 break;
25445 case DImode:
25446 fn = gen_load_lockeddi;
25447 break;
25448 case TImode:
25449 fn = gen_load_lockedti;
25450 break;
25451 default:
25452 gcc_unreachable ();
25454 emit_insn (fn (reg, mem));
25457 /* A subroutine of the atomic operation splitters. Emit a store-conditional
25458 instruction in MODE. */
25460 static void
25461 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
25463 rtx (*fn) (rtx, rtx, rtx) = NULL;
25465 switch (mode)
25467 case QImode:
25468 fn = gen_store_conditionalqi;
25469 break;
25470 case HImode:
25471 fn = gen_store_conditionalhi;
25472 break;
25473 case SImode:
25474 fn = gen_store_conditionalsi;
25475 break;
25476 case DImode:
25477 fn = gen_store_conditionaldi;
25478 break;
25479 case TImode:
25480 fn = gen_store_conditionalti;
25481 break;
25482 default:
25483 gcc_unreachable ();
25486 /* Emit sync before stwcx. to address PPC405 Erratum. */
25487 if (PPC405_ERRATUM77)
25488 emit_insn (gen_hwsync ());
25490 emit_insn (fn (res, mem, val));
25493 /* Expand barriers before and after a load_locked/store_cond sequence. */
25495 static rtx
25496 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
25498 rtx addr = XEXP (mem, 0);
25499 int strict_p = (reload_in_progress || reload_completed);
25501 if (!legitimate_indirect_address_p (addr, strict_p)
25502 && !legitimate_indexed_address_p (addr, strict_p))
25504 addr = force_reg (Pmode, addr);
25505 mem = replace_equiv_address_nv (mem, addr);
25508 switch (model)
25510 case MEMMODEL_RELAXED:
25511 case MEMMODEL_CONSUME:
25512 case MEMMODEL_ACQUIRE:
25513 break;
25514 case MEMMODEL_RELEASE:
25515 case MEMMODEL_ACQ_REL:
25516 emit_insn (gen_lwsync ());
25517 break;
25518 case MEMMODEL_SEQ_CST:
25519 emit_insn (gen_hwsync ());
25520 break;
25521 default:
25522 gcc_unreachable ();
25524 return mem;
25527 static void
25528 rs6000_post_atomic_barrier (enum memmodel model)
25530 switch (model)
25532 case MEMMODEL_RELAXED:
25533 case MEMMODEL_CONSUME:
25534 case MEMMODEL_RELEASE:
25535 break;
25536 case MEMMODEL_ACQUIRE:
25537 case MEMMODEL_ACQ_REL:
25538 case MEMMODEL_SEQ_CST:
25539 emit_insn (gen_isync ());
25540 break;
25541 default:
25542 gcc_unreachable ();
25546 /* A subroutine of the various atomic expanders. For sub-word operations,
25547 we must adjust things to operate on SImode. Given the original MEM,
25548 return a new aligned memory. Also build and return the quantities by
25549 which to shift and mask. */
25551 static rtx
25552 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
25554 rtx addr, align, shift, mask, mem;
25555 HOST_WIDE_INT shift_mask;
25556 machine_mode mode = GET_MODE (orig_mem);
25558 /* For smaller modes, we have to implement this via SImode. */
25559 shift_mask = (mode == QImode ? 0x18 : 0x10);
25561 addr = XEXP (orig_mem, 0);
25562 addr = force_reg (GET_MODE (addr), addr);
25564 /* Aligned memory containing subword. Generate a new memory. We
25565 do not want any of the existing MEM_ATTR data, as we're now
25566 accessing memory outside the original object. */
25567 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
25568 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25569 mem = gen_rtx_MEM (SImode, align);
25570 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
25571 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
25572 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
25574 /* Shift amount for subword relative to aligned word. */
25575 shift = gen_reg_rtx (SImode);
25576 addr = gen_lowpart (SImode, addr);
25577 rtx tmp = gen_reg_rtx (SImode);
25578 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
25579 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
25580 if (BYTES_BIG_ENDIAN)
25581 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
25582 shift, 1, OPTAB_LIB_WIDEN);
25583 *pshift = shift;
25585 /* Mask for insertion. */
25586 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
25587 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
25588 *pmask = mask;
25590 return mem;
25593 /* A subroutine of the various atomic expanders. For sub-word operands,
25594 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
25596 static rtx
25597 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
25599 rtx x;
25601 x = gen_reg_rtx (SImode);
25602 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
25603 gen_rtx_NOT (SImode, mask),
25604 oldval)));
25606 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
25608 return x;
25611 /* A subroutine of the various atomic expanders. For sub-word operands,
25612 extract WIDE to NARROW via SHIFT. */
25614 static void
25615 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
25617 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
25618 wide, 1, OPTAB_LIB_WIDEN);
25619 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
25622 /* Expand an atomic compare and swap operation. */
25624 void
25625 rs6000_expand_atomic_compare_and_swap (rtx operands[])
25627 rtx boolval, retval, mem, oldval, newval, cond;
25628 rtx label1, label2, x, mask, shift;
25629 machine_mode mode, orig_mode;
25630 enum memmodel mod_s, mod_f;
25631 bool is_weak;
25633 boolval = operands[0];
25634 retval = operands[1];
25635 mem = operands[2];
25636 oldval = operands[3];
25637 newval = operands[4];
25638 is_weak = (INTVAL (operands[5]) != 0);
25639 mod_s = memmodel_base (INTVAL (operands[6]));
25640 mod_f = memmodel_base (INTVAL (operands[7]));
25641 orig_mode = mode = GET_MODE (mem);
25643 mask = shift = NULL_RTX;
25644 if (mode == QImode || mode == HImode)
25646 /* Before power8, we didn't have access to lbarx/lharx, so generate a
25647 lwarx and shift/mask operations. With power8, we need to do the
25648 comparison in SImode, but the store is still done in QI/HImode. */
25649 oldval = convert_modes (SImode, mode, oldval, 1);
25651 if (!TARGET_SYNC_HI_QI)
25653 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
25655 /* Shift and mask OLDVAL into position with the word. */
25656 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
25657 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25659 /* Shift and mask NEWVAL into position within the word. */
25660 newval = convert_modes (SImode, mode, newval, 1);
25661 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
25662 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25665 /* Prepare to adjust the return value. */
25666 retval = gen_reg_rtx (SImode);
25667 mode = SImode;
25669 else if (reg_overlap_mentioned_p (retval, oldval))
25670 oldval = copy_to_reg (oldval);
25672 if (mode != TImode && !reg_or_short_operand (oldval, mode))
25673 oldval = copy_to_mode_reg (mode, oldval);
25675 if (reg_overlap_mentioned_p (retval, newval))
25676 newval = copy_to_reg (newval);
25678 mem = rs6000_pre_atomic_barrier (mem, mod_s);
25680 label1 = NULL_RTX;
25681 if (!is_weak)
25683 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
25684 emit_label (XEXP (label1, 0));
25686 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
25688 emit_load_locked (mode, retval, mem);
25690 x = retval;
25691 if (mask)
25692 x = expand_simple_binop (SImode, AND, retval, mask,
25693 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25695 cond = gen_reg_rtx (CCmode);
25696 /* If we have TImode, synthesize a comparison. */
25697 if (mode != TImode)
25698 x = gen_rtx_COMPARE (CCmode, x, oldval);
25699 else
25701 rtx xor1_result = gen_reg_rtx (DImode);
25702 rtx xor2_result = gen_reg_rtx (DImode);
25703 rtx or_result = gen_reg_rtx (DImode);
25704 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
25705 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
25706 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
25707 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
25709 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
25710 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
25711 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
25712 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
25715 emit_insn (gen_rtx_SET (cond, x));
25717 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25718 emit_unlikely_jump (x, label2);
25720 x = newval;
25721 if (mask)
25722 x = rs6000_mask_atomic_subword (retval, newval, mask);
25724 emit_store_conditional (orig_mode, cond, mem, x);
25726 if (!is_weak)
25728 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25729 emit_unlikely_jump (x, label1);
25732 if (!is_mm_relaxed (mod_f))
25733 emit_label (XEXP (label2, 0));
25735 rs6000_post_atomic_barrier (mod_s);
25737 if (is_mm_relaxed (mod_f))
25738 emit_label (XEXP (label2, 0));
25740 if (shift)
25741 rs6000_finish_atomic_subword (operands[1], retval, shift);
25742 else if (mode != GET_MODE (operands[1]))
25743 convert_move (operands[1], retval, 1);
25745 /* In all cases, CR0 contains EQ on success, and NE on failure. */
25746 x = gen_rtx_EQ (SImode, cond, const0_rtx);
25747 emit_insn (gen_rtx_SET (boolval, x));
25750 /* Expand an atomic exchange operation. */
25752 void
25753 rs6000_expand_atomic_exchange (rtx operands[])
25755 rtx retval, mem, val, cond;
25756 machine_mode mode;
25757 enum memmodel model;
25758 rtx label, x, mask, shift;
25760 retval = operands[0];
25761 mem = operands[1];
25762 val = operands[2];
25763 model = memmodel_base (INTVAL (operands[3]));
25764 mode = GET_MODE (mem);
25766 mask = shift = NULL_RTX;
25767 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
25769 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
25771 /* Shift and mask VAL into position with the word. */
25772 val = convert_modes (SImode, mode, val, 1);
25773 val = expand_simple_binop (SImode, ASHIFT, val, shift,
25774 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25776 /* Prepare to adjust the return value. */
25777 retval = gen_reg_rtx (SImode);
25778 mode = SImode;
25781 mem = rs6000_pre_atomic_barrier (mem, model);
25783 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
25784 emit_label (XEXP (label, 0));
25786 emit_load_locked (mode, retval, mem);
25788 x = val;
25789 if (mask)
25790 x = rs6000_mask_atomic_subword (retval, val, mask);
25792 cond = gen_reg_rtx (CCmode);
25793 emit_store_conditional (mode, cond, mem, x);
25795 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25796 emit_unlikely_jump (x, label);
25798 rs6000_post_atomic_barrier (model);
25800 if (shift)
25801 rs6000_finish_atomic_subword (operands[0], retval, shift);
25804 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
25805 to perform. MEM is the memory on which to operate. VAL is the second
25806 operand of the binary operator. BEFORE and AFTER are optional locations to
25807 return the value of MEM either before of after the operation. MODEL_RTX
25808 is a CONST_INT containing the memory model to use. */
25810 void
25811 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
25812 rtx orig_before, rtx orig_after, rtx model_rtx)
25814 enum memmodel model = memmodel_base (INTVAL (model_rtx));
25815 machine_mode mode = GET_MODE (mem);
25816 machine_mode store_mode = mode;
25817 rtx label, x, cond, mask, shift;
25818 rtx before = orig_before, after = orig_after;
25820 mask = shift = NULL_RTX;
25821 /* On power8, we want to use SImode for the operation. On previous systems,
25822 use the operation in a subword and shift/mask to get the proper byte or
25823 halfword. */
25824 if (mode == QImode || mode == HImode)
25826 if (TARGET_SYNC_HI_QI)
25828 val = convert_modes (SImode, mode, val, 1);
25830 /* Prepare to adjust the return value. */
25831 before = gen_reg_rtx (SImode);
25832 if (after)
25833 after = gen_reg_rtx (SImode);
25834 mode = SImode;
25836 else
25838 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
25840 /* Shift and mask VAL into position with the word. */
25841 val = convert_modes (SImode, mode, val, 1);
25842 val = expand_simple_binop (SImode, ASHIFT, val, shift,
25843 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25845 switch (code)
25847 case IOR:
25848 case XOR:
25849 /* We've already zero-extended VAL. That is sufficient to
25850 make certain that it does not affect other bits. */
25851 mask = NULL;
25852 break;
25854 case AND:
25855 /* If we make certain that all of the other bits in VAL are
25856 set, that will be sufficient to not affect other bits. */
25857 x = gen_rtx_NOT (SImode, mask);
25858 x = gen_rtx_IOR (SImode, x, val);
25859 emit_insn (gen_rtx_SET (val, x));
25860 mask = NULL;
25861 break;
25863 case NOT:
25864 case PLUS:
25865 case MINUS:
25866 /* These will all affect bits outside the field and need
25867 adjustment via MASK within the loop. */
25868 break;
25870 default:
25871 gcc_unreachable ();
25874 /* Prepare to adjust the return value. */
25875 before = gen_reg_rtx (SImode);
25876 if (after)
25877 after = gen_reg_rtx (SImode);
25878 store_mode = mode = SImode;
25882 mem = rs6000_pre_atomic_barrier (mem, model);
25884 label = gen_label_rtx ();
25885 emit_label (label);
25886 label = gen_rtx_LABEL_REF (VOIDmode, label);
25888 if (before == NULL_RTX)
25889 before = gen_reg_rtx (mode);
25891 emit_load_locked (mode, before, mem);
25893 if (code == NOT)
25895 x = expand_simple_binop (mode, AND, before, val,
25896 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25897 after = expand_simple_unop (mode, NOT, x, after, 1);
25899 else
25901 after = expand_simple_binop (mode, code, before, val,
25902 after, 1, OPTAB_LIB_WIDEN);
25905 x = after;
25906 if (mask)
25908 x = expand_simple_binop (SImode, AND, after, mask,
25909 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25910 x = rs6000_mask_atomic_subword (before, x, mask);
25912 else if (store_mode != mode)
25913 x = convert_modes (store_mode, mode, x, 1);
25915 cond = gen_reg_rtx (CCmode);
25916 emit_store_conditional (store_mode, cond, mem, x);
25918 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25919 emit_unlikely_jump (x, label);
25921 rs6000_post_atomic_barrier (model);
25923 if (shift)
25925 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
25926 then do the calcuations in a SImode register. */
25927 if (orig_before)
25928 rs6000_finish_atomic_subword (orig_before, before, shift);
25929 if (orig_after)
25930 rs6000_finish_atomic_subword (orig_after, after, shift);
25932 else if (store_mode != mode)
25934 /* QImode/HImode on machines with lbarx/lharx where we do the native
25935 operation and then do the calcuations in a SImode register. */
25936 if (orig_before)
25937 convert_move (orig_before, before, 1);
25938 if (orig_after)
25939 convert_move (orig_after, after, 1);
25941 else if (orig_after && after != orig_after)
25942 emit_move_insn (orig_after, after);
25945 /* Emit instructions to move SRC to DST. Called by splitters for
25946 multi-register moves. It will emit at most one instruction for
25947 each register that is accessed; that is, it won't emit li/lis pairs
25948 (or equivalent for 64-bit code). One of SRC or DST must be a hard
25949 register. */
25951 void
25952 rs6000_split_multireg_move (rtx dst, rtx src)
25954 /* The register number of the first register being moved. */
25955 int reg;
25956 /* The mode that is to be moved. */
25957 machine_mode mode;
25958 /* The mode that the move is being done in, and its size. */
25959 machine_mode reg_mode;
25960 int reg_mode_size;
25961 /* The number of registers that will be moved. */
25962 int nregs;
25964 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
25965 mode = GET_MODE (dst);
25966 nregs = hard_regno_nregs[reg][mode];
25967 if (FP_REGNO_P (reg))
25968 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
25969 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
25970 else if (ALTIVEC_REGNO_P (reg))
25971 reg_mode = V16QImode;
25972 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
25973 reg_mode = DFmode;
25974 else
25975 reg_mode = word_mode;
25976 reg_mode_size = GET_MODE_SIZE (reg_mode);
25978 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
25980 /* TDmode residing in FP registers is special, since the ISA requires that
25981 the lower-numbered word of a register pair is always the most significant
25982 word, even in little-endian mode. This does not match the usual subreg
25983 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
25984 the appropriate constituent registers "by hand" in little-endian mode.
25986 Note we do not need to check for destructive overlap here since TDmode
25987 can only reside in even/odd register pairs. */
25988 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
25990 rtx p_src, p_dst;
25991 int i;
25993 for (i = 0; i < nregs; i++)
25995 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
25996 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
25997 else
25998 p_src = simplify_gen_subreg (reg_mode, src, mode,
25999 i * reg_mode_size);
26001 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26002 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26003 else
26004 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26005 i * reg_mode_size);
26007 emit_insn (gen_rtx_SET (p_dst, p_src));
26010 return;
26013 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26015 /* Move register range backwards, if we might have destructive
26016 overlap. */
26017 int i;
26018 for (i = nregs - 1; i >= 0; i--)
26019 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26020 i * reg_mode_size),
26021 simplify_gen_subreg (reg_mode, src, mode,
26022 i * reg_mode_size)));
26024 else
26026 int i;
26027 int j = -1;
26028 bool used_update = false;
26029 rtx restore_basereg = NULL_RTX;
26031 if (MEM_P (src) && INT_REGNO_P (reg))
26033 rtx breg;
26035 if (GET_CODE (XEXP (src, 0)) == PRE_INC
26036 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26038 rtx delta_rtx;
26039 breg = XEXP (XEXP (src, 0), 0);
26040 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26041 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26042 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26043 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26044 src = replace_equiv_address (src, breg);
26046 else if (! rs6000_offsettable_memref_p (src, reg_mode))
26048 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26050 rtx basereg = XEXP (XEXP (src, 0), 0);
26051 if (TARGET_UPDATE)
26053 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26054 emit_insn (gen_rtx_SET (ndst,
26055 gen_rtx_MEM (reg_mode,
26056 XEXP (src, 0))));
26057 used_update = true;
26059 else
26060 emit_insn (gen_rtx_SET (basereg,
26061 XEXP (XEXP (src, 0), 1)));
26062 src = replace_equiv_address (src, basereg);
26064 else
26066 rtx basereg = gen_rtx_REG (Pmode, reg);
26067 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26068 src = replace_equiv_address (src, basereg);
26072 breg = XEXP (src, 0);
26073 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26074 breg = XEXP (breg, 0);
26076 /* If the base register we are using to address memory is
26077 also a destination reg, then change that register last. */
26078 if (REG_P (breg)
26079 && REGNO (breg) >= REGNO (dst)
26080 && REGNO (breg) < REGNO (dst) + nregs)
26081 j = REGNO (breg) - REGNO (dst);
26083 else if (MEM_P (dst) && INT_REGNO_P (reg))
26085 rtx breg;
26087 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26088 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26090 rtx delta_rtx;
26091 breg = XEXP (XEXP (dst, 0), 0);
26092 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26093 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26094 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26096 /* We have to update the breg before doing the store.
26097 Use store with update, if available. */
26099 if (TARGET_UPDATE)
26101 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26102 emit_insn (TARGET_32BIT
26103 ? (TARGET_POWERPC64
26104 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26105 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
26106 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26107 used_update = true;
26109 else
26110 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26111 dst = replace_equiv_address (dst, breg);
26113 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
26114 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26116 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
26118 rtx basereg = XEXP (XEXP (dst, 0), 0);
26119 if (TARGET_UPDATE)
26121 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26122 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
26123 XEXP (dst, 0)),
26124 nsrc));
26125 used_update = true;
26127 else
26128 emit_insn (gen_rtx_SET (basereg,
26129 XEXP (XEXP (dst, 0), 1)));
26130 dst = replace_equiv_address (dst, basereg);
26132 else
26134 rtx basereg = XEXP (XEXP (dst, 0), 0);
26135 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
26136 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
26137 && REG_P (basereg)
26138 && REG_P (offsetreg)
26139 && REGNO (basereg) != REGNO (offsetreg));
26140 if (REGNO (basereg) == 0)
26142 rtx tmp = offsetreg;
26143 offsetreg = basereg;
26144 basereg = tmp;
26146 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
26147 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
26148 dst = replace_equiv_address (dst, basereg);
26151 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
26152 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
26155 for (i = 0; i < nregs; i++)
26157 /* Calculate index to next subword. */
26158 ++j;
26159 if (j == nregs)
26160 j = 0;
26162 /* If compiler already emitted move of first word by
26163 store with update, no need to do anything. */
26164 if (j == 0 && used_update)
26165 continue;
26167 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26168 j * reg_mode_size),
26169 simplify_gen_subreg (reg_mode, src, mode,
26170 j * reg_mode_size)));
26172 if (restore_basereg != NULL_RTX)
26173 emit_insn (restore_basereg);
26178 /* This page contains routines that are used to determine what the
26179 function prologue and epilogue code will do and write them out. */
26181 static inline bool
26182 save_reg_p (int r)
26184 return !call_used_regs[r] && df_regs_ever_live_p (r);
26187 /* Determine whether the gp REG is really used. */
26189 static bool
26190 rs6000_reg_live_or_pic_offset_p (int reg)
26192 /* We need to mark the PIC offset register live for the same conditions
26193 as it is set up, or otherwise it won't be saved before we clobber it. */
26195 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
26197 if (TARGET_TOC && TARGET_MINIMAL_TOC
26198 && (crtl->calls_eh_return
26199 || df_regs_ever_live_p (reg)
26200 || !constant_pool_empty_p ()))
26201 return true;
26203 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
26204 && flag_pic)
26205 return true;
26208 /* If the function calls eh_return, claim used all the registers that would
26209 be checked for liveness otherwise. */
26211 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
26212 && !call_used_regs[reg]);
26215 /* Return the first fixed-point register that is required to be
26216 saved. 32 if none. */
26219 first_reg_to_save (void)
26221 int first_reg;
26223 /* Find lowest numbered live register. */
26224 for (first_reg = 13; first_reg <= 31; first_reg++)
26225 if (save_reg_p (first_reg))
26226 break;
26228 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
26229 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
26230 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
26231 || (TARGET_TOC && TARGET_MINIMAL_TOC))
26232 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
26233 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
26235 #if TARGET_MACHO
26236 if (flag_pic
26237 && crtl->uses_pic_offset_table
26238 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
26239 return RS6000_PIC_OFFSET_TABLE_REGNUM;
26240 #endif
26242 return first_reg;
26245 /* Similar, for FP regs. */
26248 first_fp_reg_to_save (void)
26250 int first_reg;
26252 /* Find lowest numbered live register. */
26253 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
26254 if (save_reg_p (first_reg))
26255 break;
26257 return first_reg;
26260 /* Similar, for AltiVec regs. */
26262 static int
26263 first_altivec_reg_to_save (void)
26265 int i;
26267 /* Stack frame remains as is unless we are in AltiVec ABI. */
26268 if (! TARGET_ALTIVEC_ABI)
26269 return LAST_ALTIVEC_REGNO + 1;
26271 /* On Darwin, the unwind routines are compiled without
26272 TARGET_ALTIVEC, and use save_world to save/restore the
26273 altivec registers when necessary. */
26274 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
26275 && ! TARGET_ALTIVEC)
26276 return FIRST_ALTIVEC_REGNO + 20;
26278 /* Find lowest numbered live register. */
26279 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
26280 if (save_reg_p (i))
26281 break;
26283 return i;
26286 /* Return a 32-bit mask of the AltiVec registers we need to set in
26287 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
26288 the 32-bit word is 0. */
26290 static unsigned int
26291 compute_vrsave_mask (void)
26293 unsigned int i, mask = 0;
26295 /* On Darwin, the unwind routines are compiled without
26296 TARGET_ALTIVEC, and use save_world to save/restore the
26297 call-saved altivec registers when necessary. */
26298 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
26299 && ! TARGET_ALTIVEC)
26300 mask |= 0xFFF;
26302 /* First, find out if we use _any_ altivec registers. */
26303 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26304 if (df_regs_ever_live_p (i))
26305 mask |= ALTIVEC_REG_BIT (i);
26307 if (mask == 0)
26308 return mask;
26310 /* Next, remove the argument registers from the set. These must
26311 be in the VRSAVE mask set by the caller, so we don't need to add
26312 them in again. More importantly, the mask we compute here is
26313 used to generate CLOBBERs in the set_vrsave insn, and we do not
26314 wish the argument registers to die. */
26315 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
26316 mask &= ~ALTIVEC_REG_BIT (i);
26318 /* Similarly, remove the return value from the set. */
26320 bool yes = false;
26321 diddle_return_value (is_altivec_return_reg, &yes);
26322 if (yes)
26323 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
26326 return mask;
26329 /* For a very restricted set of circumstances, we can cut down the
26330 size of prologues/epilogues by calling our own save/restore-the-world
26331 routines. */
26333 static void
26334 compute_save_world_info (rs6000_stack_t *info)
26336 info->world_save_p = 1;
26337 info->world_save_p
26338 = (WORLD_SAVE_P (info)
26339 && DEFAULT_ABI == ABI_DARWIN
26340 && !cfun->has_nonlocal_label
26341 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
26342 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
26343 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
26344 && info->cr_save_p);
26346 /* This will not work in conjunction with sibcalls. Make sure there
26347 are none. (This check is expensive, but seldom executed.) */
26348 if (WORLD_SAVE_P (info))
26350 rtx_insn *insn;
26351 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
26352 if (CALL_P (insn) && SIBLING_CALL_P (insn))
26354 info->world_save_p = 0;
26355 break;
26359 if (WORLD_SAVE_P (info))
26361 /* Even if we're not touching VRsave, make sure there's room on the
26362 stack for it, if it looks like we're calling SAVE_WORLD, which
26363 will attempt to save it. */
26364 info->vrsave_size = 4;
26366 /* If we are going to save the world, we need to save the link register too. */
26367 info->lr_save_p = 1;
26369 /* "Save" the VRsave register too if we're saving the world. */
26370 if (info->vrsave_mask == 0)
26371 info->vrsave_mask = compute_vrsave_mask ();
26373 /* Because the Darwin register save/restore routines only handle
26374 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
26375 check. */
26376 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
26377 && (info->first_altivec_reg_save
26378 >= FIRST_SAVED_ALTIVEC_REGNO));
26381 return;
26385 static void
26386 is_altivec_return_reg (rtx reg, void *xyes)
26388 bool *yes = (bool *) xyes;
26389 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
26390 *yes = true;
26394 /* Return whether REG is a global user reg or has been specifed by
26395 -ffixed-REG. We should not restore these, and so cannot use
26396 lmw or out-of-line restore functions if there are any. We also
26397 can't save them (well, emit frame notes for them), because frame
26398 unwinding during exception handling will restore saved registers. */
26400 static bool
26401 fixed_reg_p (int reg)
26403 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
26404 backend sets it, overriding anything the user might have given. */
26405 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
26406 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
26407 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
26408 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
26409 return false;
26411 return fixed_regs[reg];
26414 /* Determine the strategy for savings/restoring registers. */
26416 enum {
26417 SAVE_MULTIPLE = 0x1,
26418 SAVE_INLINE_GPRS = 0x2,
26419 SAVE_INLINE_FPRS = 0x4,
26420 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
26421 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
26422 SAVE_INLINE_VRS = 0x20,
26423 REST_MULTIPLE = 0x100,
26424 REST_INLINE_GPRS = 0x200,
26425 REST_INLINE_FPRS = 0x400,
26426 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
26427 REST_INLINE_VRS = 0x1000
26430 static int
26431 rs6000_savres_strategy (rs6000_stack_t *info,
26432 bool using_static_chain_p)
26434 int strategy = 0;
26436 /* Select between in-line and out-of-line save and restore of regs.
26437 First, all the obvious cases where we don't use out-of-line. */
26438 if (crtl->calls_eh_return
26439 || cfun->machine->ra_need_lr)
26440 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
26441 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
26442 | SAVE_INLINE_VRS | REST_INLINE_VRS);
26444 if (info->first_gp_reg_save == 32)
26445 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26447 if (info->first_fp_reg_save == 64
26448 /* The out-of-line FP routines use double-precision stores;
26449 we can't use those routines if we don't have such stores. */
26450 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
26451 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26453 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
26454 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26456 /* Define cutoff for using out-of-line functions to save registers. */
26457 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
26459 if (!optimize_size)
26461 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26462 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26463 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26465 else
26467 /* Prefer out-of-line restore if it will exit. */
26468 if (info->first_fp_reg_save > 61)
26469 strategy |= SAVE_INLINE_FPRS;
26470 if (info->first_gp_reg_save > 29)
26472 if (info->first_fp_reg_save == 64)
26473 strategy |= SAVE_INLINE_GPRS;
26474 else
26475 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26477 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
26478 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26481 else if (DEFAULT_ABI == ABI_DARWIN)
26483 if (info->first_fp_reg_save > 60)
26484 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26485 if (info->first_gp_reg_save > 29)
26486 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26487 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26489 else
26491 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26492 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
26493 || info->first_fp_reg_save > 61)
26494 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
26495 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26496 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
26499 /* Don't bother to try to save things out-of-line if r11 is occupied
26500 by the static chain. It would require too much fiddling and the
26501 static chain is rarely used anyway. FPRs are saved w.r.t the stack
26502 pointer on Darwin, and AIX uses r1 or r12. */
26503 if (using_static_chain_p
26504 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26505 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
26506 | SAVE_INLINE_GPRS
26507 | SAVE_INLINE_VRS);
26509 /* Saving CR interferes with the exit routines used on the SPE, so
26510 just punt here. */
26511 if (TARGET_SPE_ABI
26512 && info->spe_64bit_regs_used
26513 && info->cr_save_p)
26514 strategy |= REST_INLINE_GPRS;
26516 /* We can only use the out-of-line routines to restore fprs if we've
26517 saved all the registers from first_fp_reg_save in the prologue.
26518 Otherwise, we risk loading garbage. Of course, if we have saved
26519 out-of-line then we know we haven't skipped any fprs. */
26520 if ((strategy & SAVE_INLINE_FPRS)
26521 && !(strategy & REST_INLINE_FPRS))
26523 int i;
26525 for (i = info->first_fp_reg_save; i < 64; i++)
26526 if (fixed_regs[i] || !save_reg_p (i))
26528 strategy |= REST_INLINE_FPRS;
26529 break;
26533 /* Similarly, for altivec regs. */
26534 if ((strategy & SAVE_INLINE_VRS)
26535 && !(strategy & REST_INLINE_VRS))
26537 int i;
26539 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
26540 if (fixed_regs[i] || !save_reg_p (i))
26542 strategy |= REST_INLINE_VRS;
26543 break;
26547 /* info->lr_save_p isn't yet set if the only reason lr needs to be
26548 saved is an out-of-line save or restore. Set up the value for
26549 the next test (excluding out-of-line gprs). */
26550 bool lr_save_p = (info->lr_save_p
26551 || !(strategy & SAVE_INLINE_FPRS)
26552 || !(strategy & SAVE_INLINE_VRS)
26553 || !(strategy & REST_INLINE_FPRS)
26554 || !(strategy & REST_INLINE_VRS));
26556 if (TARGET_MULTIPLE
26557 && !TARGET_POWERPC64
26558 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
26559 && info->first_gp_reg_save < 31
26560 && !(flag_shrink_wrap
26561 && flag_shrink_wrap_separate
26562 && optimize_function_for_speed_p (cfun)))
26564 /* Prefer store multiple for saves over out-of-line routines,
26565 since the store-multiple instruction will always be smaller. */
26566 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
26568 /* The situation is more complicated with load multiple. We'd
26569 prefer to use the out-of-line routines for restores, since the
26570 "exit" out-of-line routines can handle the restore of LR and the
26571 frame teardown. However if doesn't make sense to use the
26572 out-of-line routine if that is the only reason we'd need to save
26573 LR, and we can't use the "exit" out-of-line gpr restore if we
26574 have saved some fprs; In those cases it is advantageous to use
26575 load multiple when available. */
26576 if (info->first_fp_reg_save != 64 || !lr_save_p)
26577 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
26580 /* Using the "exit" out-of-line routine does not improve code size
26581 if using it would require lr to be saved and if only saving one
26582 or two gprs. */
26583 else if (!lr_save_p && info->first_gp_reg_save > 29)
26584 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
26586 /* We can only use load multiple or the out-of-line routines to
26587 restore gprs if we've saved all the registers from
26588 first_gp_reg_save. Otherwise, we risk loading garbage.
26589 Of course, if we have saved out-of-line or used stmw then we know
26590 we haven't skipped any gprs. */
26591 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
26592 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
26594 int i;
26596 for (i = info->first_gp_reg_save; i < 32; i++)
26597 if (fixed_reg_p (i) || !save_reg_p (i))
26599 strategy |= REST_INLINE_GPRS;
26600 strategy &= ~REST_MULTIPLE;
26601 break;
26605 if (TARGET_ELF && TARGET_64BIT)
26607 if (!(strategy & SAVE_INLINE_FPRS))
26608 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
26609 else if (!(strategy & SAVE_INLINE_GPRS)
26610 && info->first_fp_reg_save == 64)
26611 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
26613 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
26614 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
26616 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
26617 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
26619 return strategy;
26622 /* Calculate the stack information for the current function. This is
26623 complicated by having two separate calling sequences, the AIX calling
26624 sequence and the V.4 calling sequence.
26626 AIX (and Darwin/Mac OS X) stack frames look like:
26627 32-bit 64-bit
26628 SP----> +---------------------------------------+
26629 | back chain to caller | 0 0
26630 +---------------------------------------+
26631 | saved CR | 4 8 (8-11)
26632 +---------------------------------------+
26633 | saved LR | 8 16
26634 +---------------------------------------+
26635 | reserved for compilers | 12 24
26636 +---------------------------------------+
26637 | reserved for binders | 16 32
26638 +---------------------------------------+
26639 | saved TOC pointer | 20 40
26640 +---------------------------------------+
26641 | Parameter save area (+padding*) (P) | 24 48
26642 +---------------------------------------+
26643 | Alloca space (A) | 24+P etc.
26644 +---------------------------------------+
26645 | Local variable space (L) | 24+P+A
26646 +---------------------------------------+
26647 | Float/int conversion temporary (X) | 24+P+A+L
26648 +---------------------------------------+
26649 | Save area for AltiVec registers (W) | 24+P+A+L+X
26650 +---------------------------------------+
26651 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
26652 +---------------------------------------+
26653 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
26654 +---------------------------------------+
26655 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
26656 +---------------------------------------+
26657 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
26658 +---------------------------------------+
26659 old SP->| back chain to caller's caller |
26660 +---------------------------------------+
26662 * If the alloca area is present, the parameter save area is
26663 padded so that the former starts 16-byte aligned.
26665 The required alignment for AIX configurations is two words (i.e., 8
26666 or 16 bytes).
26668 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
26670 SP----> +---------------------------------------+
26671 | Back chain to caller | 0
26672 +---------------------------------------+
26673 | Save area for CR | 8
26674 +---------------------------------------+
26675 | Saved LR | 16
26676 +---------------------------------------+
26677 | Saved TOC pointer | 24
26678 +---------------------------------------+
26679 | Parameter save area (+padding*) (P) | 32
26680 +---------------------------------------+
26681 | Alloca space (A) | 32+P
26682 +---------------------------------------+
26683 | Local variable space (L) | 32+P+A
26684 +---------------------------------------+
26685 | Save area for AltiVec registers (W) | 32+P+A+L
26686 +---------------------------------------+
26687 | AltiVec alignment padding (Y) | 32+P+A+L+W
26688 +---------------------------------------+
26689 | Save area for GP registers (G) | 32+P+A+L+W+Y
26690 +---------------------------------------+
26691 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
26692 +---------------------------------------+
26693 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
26694 +---------------------------------------+
26696 * If the alloca area is present, the parameter save area is
26697 padded so that the former starts 16-byte aligned.
26699 V.4 stack frames look like:
26701 SP----> +---------------------------------------+
26702 | back chain to caller | 0
26703 +---------------------------------------+
26704 | caller's saved LR | 4
26705 +---------------------------------------+
26706 | Parameter save area (+padding*) (P) | 8
26707 +---------------------------------------+
26708 | Alloca space (A) | 8+P
26709 +---------------------------------------+
26710 | Varargs save area (V) | 8+P+A
26711 +---------------------------------------+
26712 | Local variable space (L) | 8+P+A+V
26713 +---------------------------------------+
26714 | Float/int conversion temporary (X) | 8+P+A+V+L
26715 +---------------------------------------+
26716 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
26717 +---------------------------------------+
26718 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
26719 +---------------------------------------+
26720 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
26721 +---------------------------------------+
26722 | SPE: area for 64-bit GP registers |
26723 +---------------------------------------+
26724 | SPE alignment padding |
26725 +---------------------------------------+
26726 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
26727 +---------------------------------------+
26728 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
26729 +---------------------------------------+
26730 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
26731 +---------------------------------------+
26732 old SP->| back chain to caller's caller |
26733 +---------------------------------------+
26735 * If the alloca area is present and the required alignment is
26736 16 bytes, the parameter save area is padded so that the
26737 alloca area starts 16-byte aligned.
26739 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
26740 given. (But note below and in sysv4.h that we require only 8 and
26741 may round up the size of our stack frame anyways. The historical
26742 reason is early versions of powerpc-linux which didn't properly
26743 align the stack at program startup. A happy side-effect is that
26744 -mno-eabi libraries can be used with -meabi programs.)
26746 The EABI configuration defaults to the V.4 layout. However,
26747 the stack alignment requirements may differ. If -mno-eabi is not
26748 given, the required stack alignment is 8 bytes; if -mno-eabi is
26749 given, the required alignment is 16 bytes. (But see V.4 comment
26750 above.) */
26752 #ifndef ABI_STACK_BOUNDARY
26753 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
26754 #endif
26756 static rs6000_stack_t *
26757 rs6000_stack_info (void)
26759 /* We should never be called for thunks, we are not set up for that. */
26760 gcc_assert (!cfun->is_thunk);
26762 rs6000_stack_t *info = &stack_info;
26763 int reg_size = TARGET_32BIT ? 4 : 8;
26764 int ehrd_size;
26765 int ehcr_size;
26766 int save_align;
26767 int first_gp;
26768 HOST_WIDE_INT non_fixed_size;
26769 bool using_static_chain_p;
26771 if (reload_completed && info->reload_completed)
26772 return info;
26774 memset (info, 0, sizeof (*info));
26775 info->reload_completed = reload_completed;
26777 if (TARGET_SPE)
26779 /* Cache value so we don't rescan instruction chain over and over. */
26780 if (cfun->machine->spe_insn_chain_scanned_p == 0)
26781 cfun->machine->spe_insn_chain_scanned_p
26782 = spe_func_has_64bit_regs_p () + 1;
26783 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
26786 /* Select which calling sequence. */
26787 info->abi = DEFAULT_ABI;
26789 /* Calculate which registers need to be saved & save area size. */
26790 info->first_gp_reg_save = first_reg_to_save ();
26791 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
26792 even if it currently looks like we won't. Reload may need it to
26793 get at a constant; if so, it will have already created a constant
26794 pool entry for it. */
26795 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
26796 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26797 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26798 && crtl->uses_const_pool
26799 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
26800 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
26801 else
26802 first_gp = info->first_gp_reg_save;
26804 info->gp_size = reg_size * (32 - first_gp);
26806 /* For the SPE, we have an additional upper 32-bits on each GPR.
26807 Ideally we should save the entire 64-bits only when the upper
26808 half is used in SIMD instructions. Since we only record
26809 registers live (not the size they are used in), this proves
26810 difficult because we'd have to traverse the instruction chain at
26811 the right time, taking reload into account. This is a real pain,
26812 so we opt to save the GPRs in 64-bits always if but one register
26813 gets used in 64-bits. Otherwise, all the registers in the frame
26814 get saved in 32-bits.
26816 So... since when we save all GPRs (except the SP) in 64-bits, the
26817 traditional GP save area will be empty. */
26818 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26819 info->gp_size = 0;
26821 info->first_fp_reg_save = first_fp_reg_to_save ();
26822 info->fp_size = 8 * (64 - info->first_fp_reg_save);
26824 info->first_altivec_reg_save = first_altivec_reg_to_save ();
26825 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
26826 - info->first_altivec_reg_save);
26828 /* Does this function call anything? */
26829 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
26831 /* Determine if we need to save the condition code registers. */
26832 if (save_reg_p (CR2_REGNO)
26833 || save_reg_p (CR3_REGNO)
26834 || save_reg_p (CR4_REGNO))
26836 info->cr_save_p = 1;
26837 if (DEFAULT_ABI == ABI_V4)
26838 info->cr_size = reg_size;
26841 /* If the current function calls __builtin_eh_return, then we need
26842 to allocate stack space for registers that will hold data for
26843 the exception handler. */
26844 if (crtl->calls_eh_return)
26846 unsigned int i;
26847 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
26848 continue;
26850 /* SPE saves EH registers in 64-bits. */
26851 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
26852 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
26854 else
26855 ehrd_size = 0;
26857 /* In the ELFv2 ABI, we also need to allocate space for separate
26858 CR field save areas if the function calls __builtin_eh_return. */
26859 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26861 /* This hard-codes that we have three call-saved CR fields. */
26862 ehcr_size = 3 * reg_size;
26863 /* We do *not* use the regular CR save mechanism. */
26864 info->cr_save_p = 0;
26866 else
26867 ehcr_size = 0;
26869 /* Determine various sizes. */
26870 info->reg_size = reg_size;
26871 info->fixed_size = RS6000_SAVE_AREA;
26872 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
26873 if (cfun->calls_alloca)
26874 info->parm_size =
26875 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
26876 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
26877 else
26878 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
26879 TARGET_ALTIVEC ? 16 : 8);
26880 if (FRAME_GROWS_DOWNWARD)
26881 info->vars_size
26882 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
26883 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
26884 - (info->fixed_size + info->vars_size + info->parm_size);
26886 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26887 info->spe_gp_size = 8 * (32 - first_gp);
26889 if (TARGET_ALTIVEC_ABI)
26890 info->vrsave_mask = compute_vrsave_mask ();
26892 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
26893 info->vrsave_size = 4;
26895 compute_save_world_info (info);
26897 /* Calculate the offsets. */
26898 switch (DEFAULT_ABI)
26900 case ABI_NONE:
26901 default:
26902 gcc_unreachable ();
26904 case ABI_AIX:
26905 case ABI_ELFv2:
26906 case ABI_DARWIN:
26907 info->fp_save_offset = -info->fp_size;
26908 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26910 if (TARGET_ALTIVEC_ABI)
26912 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
26914 /* Align stack so vector save area is on a quadword boundary.
26915 The padding goes above the vectors. */
26916 if (info->altivec_size != 0)
26917 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
26919 info->altivec_save_offset = info->vrsave_save_offset
26920 - info->altivec_padding_size
26921 - info->altivec_size;
26922 gcc_assert (info->altivec_size == 0
26923 || info->altivec_save_offset % 16 == 0);
26925 /* Adjust for AltiVec case. */
26926 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
26928 else
26929 info->ehrd_offset = info->gp_save_offset - ehrd_size;
26931 info->ehcr_offset = info->ehrd_offset - ehcr_size;
26932 info->cr_save_offset = reg_size; /* first word when 64-bit. */
26933 info->lr_save_offset = 2*reg_size;
26934 break;
26936 case ABI_V4:
26937 info->fp_save_offset = -info->fp_size;
26938 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26939 info->cr_save_offset = info->gp_save_offset - info->cr_size;
26941 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26943 /* Align stack so SPE GPR save area is aligned on a
26944 double-word boundary. */
26945 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
26946 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
26947 else
26948 info->spe_padding_size = 0;
26950 info->spe_gp_save_offset = info->cr_save_offset
26951 - info->spe_padding_size
26952 - info->spe_gp_size;
26954 /* Adjust for SPE case. */
26955 info->ehrd_offset = info->spe_gp_save_offset;
26957 else if (TARGET_ALTIVEC_ABI)
26959 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
26961 /* Align stack so vector save area is on a quadword boundary. */
26962 if (info->altivec_size != 0)
26963 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
26965 info->altivec_save_offset = info->vrsave_save_offset
26966 - info->altivec_padding_size
26967 - info->altivec_size;
26969 /* Adjust for AltiVec case. */
26970 info->ehrd_offset = info->altivec_save_offset;
26972 else
26973 info->ehrd_offset = info->cr_save_offset;
26975 info->ehrd_offset -= ehrd_size;
26976 info->lr_save_offset = reg_size;
26979 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
26980 info->save_size = RS6000_ALIGN (info->fp_size
26981 + info->gp_size
26982 + info->altivec_size
26983 + info->altivec_padding_size
26984 + info->spe_gp_size
26985 + info->spe_padding_size
26986 + ehrd_size
26987 + ehcr_size
26988 + info->cr_size
26989 + info->vrsave_size,
26990 save_align);
26992 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
26994 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
26995 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
26997 /* Determine if we need to save the link register. */
26998 if (info->calls_p
26999 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27000 && crtl->profile
27001 && !TARGET_PROFILE_KERNEL)
27002 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
27003 #ifdef TARGET_RELOCATABLE
27004 || (DEFAULT_ABI == ABI_V4
27005 && (TARGET_RELOCATABLE || flag_pic > 1)
27006 && !constant_pool_empty_p ())
27007 #endif
27008 || rs6000_ra_ever_killed ())
27009 info->lr_save_p = 1;
27011 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27012 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27013 && call_used_regs[STATIC_CHAIN_REGNUM]);
27014 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
27016 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
27017 || !(info->savres_strategy & SAVE_INLINE_FPRS)
27018 || !(info->savres_strategy & SAVE_INLINE_VRS)
27019 || !(info->savres_strategy & REST_INLINE_GPRS)
27020 || !(info->savres_strategy & REST_INLINE_FPRS)
27021 || !(info->savres_strategy & REST_INLINE_VRS))
27022 info->lr_save_p = 1;
27024 if (info->lr_save_p)
27025 df_set_regs_ever_live (LR_REGNO, true);
27027 /* Determine if we need to allocate any stack frame:
27029 For AIX we need to push the stack if a frame pointer is needed
27030 (because the stack might be dynamically adjusted), if we are
27031 debugging, if we make calls, or if the sum of fp_save, gp_save,
27032 and local variables are more than the space needed to save all
27033 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27034 + 18*8 = 288 (GPR13 reserved).
27036 For V.4 we don't have the stack cushion that AIX uses, but assume
27037 that the debugger can handle stackless frames. */
27039 if (info->calls_p)
27040 info->push_p = 1;
27042 else if (DEFAULT_ABI == ABI_V4)
27043 info->push_p = non_fixed_size != 0;
27045 else if (frame_pointer_needed)
27046 info->push_p = 1;
27048 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
27049 info->push_p = 1;
27051 else
27052 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
27054 return info;
27057 /* Return true if the current function uses any GPRs in 64-bit SIMD
27058 mode. */
27060 static bool
27061 spe_func_has_64bit_regs_p (void)
27063 rtx_insn *insns, *insn;
27065 /* Functions that save and restore all the call-saved registers will
27066 need to save/restore the registers in 64-bits. */
27067 if (crtl->calls_eh_return
27068 || cfun->calls_setjmp
27069 || crtl->has_nonlocal_goto)
27070 return true;
27072 insns = get_insns ();
27074 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
27076 if (INSN_P (insn))
27078 rtx i;
27080 /* FIXME: This should be implemented with attributes...
27082 (set_attr "spe64" "true")....then,
27083 if (get_spe64(insn)) return true;
27085 It's the only reliable way to do the stuff below. */
27087 i = PATTERN (insn);
27088 if (GET_CODE (i) == SET)
27090 machine_mode mode = GET_MODE (SET_SRC (i));
27092 if (SPE_VECTOR_MODE (mode))
27093 return true;
27094 if (TARGET_E500_DOUBLE
27095 && (mode == DFmode || FLOAT128_2REG_P (mode)))
27096 return true;
27101 return false;
27104 static void
27105 debug_stack_info (rs6000_stack_t *info)
27107 const char *abi_string;
27109 if (! info)
27110 info = rs6000_stack_info ();
27112 fprintf (stderr, "\nStack information for function %s:\n",
27113 ((current_function_decl && DECL_NAME (current_function_decl))
27114 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
27115 : "<unknown>"));
27117 switch (info->abi)
27119 default: abi_string = "Unknown"; break;
27120 case ABI_NONE: abi_string = "NONE"; break;
27121 case ABI_AIX: abi_string = "AIX"; break;
27122 case ABI_ELFv2: abi_string = "ELFv2"; break;
27123 case ABI_DARWIN: abi_string = "Darwin"; break;
27124 case ABI_V4: abi_string = "V.4"; break;
27127 fprintf (stderr, "\tABI = %5s\n", abi_string);
27129 if (TARGET_ALTIVEC_ABI)
27130 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
27132 if (TARGET_SPE_ABI)
27133 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
27135 if (info->first_gp_reg_save != 32)
27136 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
27138 if (info->first_fp_reg_save != 64)
27139 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
27141 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
27142 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
27143 info->first_altivec_reg_save);
27145 if (info->lr_save_p)
27146 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
27148 if (info->cr_save_p)
27149 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
27151 if (info->vrsave_mask)
27152 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
27154 if (info->push_p)
27155 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
27157 if (info->calls_p)
27158 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
27160 if (info->gp_size)
27161 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
27163 if (info->fp_size)
27164 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
27166 if (info->altivec_size)
27167 fprintf (stderr, "\taltivec_save_offset = %5d\n",
27168 info->altivec_save_offset);
27170 if (info->spe_gp_size)
27171 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
27172 info->spe_gp_save_offset);
27174 if (info->vrsave_size)
27175 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
27176 info->vrsave_save_offset);
27178 if (info->lr_save_p)
27179 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
27181 if (info->cr_save_p)
27182 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
27184 if (info->varargs_save_offset)
27185 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
27187 if (info->total_size)
27188 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
27189 info->total_size);
27191 if (info->vars_size)
27192 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
27193 info->vars_size);
27195 if (info->parm_size)
27196 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
27198 if (info->fixed_size)
27199 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
27201 if (info->gp_size)
27202 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
27204 if (info->spe_gp_size)
27205 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
27207 if (info->fp_size)
27208 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
27210 if (info->altivec_size)
27211 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
27213 if (info->vrsave_size)
27214 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
27216 if (info->altivec_padding_size)
27217 fprintf (stderr, "\taltivec_padding_size= %5d\n",
27218 info->altivec_padding_size);
27220 if (info->spe_padding_size)
27221 fprintf (stderr, "\tspe_padding_size = %5d\n",
27222 info->spe_padding_size);
27224 if (info->cr_size)
27225 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
27227 if (info->save_size)
27228 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
27230 if (info->reg_size != 4)
27231 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
27233 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
27235 fprintf (stderr, "\n");
27239 rs6000_return_addr (int count, rtx frame)
27241 /* Currently we don't optimize very well between prolog and body
27242 code and for PIC code the code can be actually quite bad, so
27243 don't try to be too clever here. */
27244 if (count != 0
27245 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
27247 cfun->machine->ra_needs_full_frame = 1;
27249 return
27250 gen_rtx_MEM
27251 (Pmode,
27252 memory_address
27253 (Pmode,
27254 plus_constant (Pmode,
27255 copy_to_reg
27256 (gen_rtx_MEM (Pmode,
27257 memory_address (Pmode, frame))),
27258 RETURN_ADDRESS_OFFSET)));
27261 cfun->machine->ra_need_lr = 1;
27262 return get_hard_reg_initial_val (Pmode, LR_REGNO);
27265 /* Say whether a function is a candidate for sibcall handling or not. */
27267 static bool
27268 rs6000_function_ok_for_sibcall (tree decl, tree exp)
27270 tree fntype;
27272 if (decl)
27273 fntype = TREE_TYPE (decl);
27274 else
27275 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
27277 /* We can't do it if the called function has more vector parameters
27278 than the current function; there's nowhere to put the VRsave code. */
27279 if (TARGET_ALTIVEC_ABI
27280 && TARGET_ALTIVEC_VRSAVE
27281 && !(decl && decl == current_function_decl))
27283 function_args_iterator args_iter;
27284 tree type;
27285 int nvreg = 0;
27287 /* Functions with vector parameters are required to have a
27288 prototype, so the argument type info must be available
27289 here. */
27290 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
27291 if (TREE_CODE (type) == VECTOR_TYPE
27292 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
27293 nvreg++;
27295 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
27296 if (TREE_CODE (type) == VECTOR_TYPE
27297 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
27298 nvreg--;
27300 if (nvreg > 0)
27301 return false;
27304 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
27305 functions, because the callee may have a different TOC pointer to
27306 the caller and there's no way to ensure we restore the TOC when
27307 we return. With the secure-plt SYSV ABI we can't make non-local
27308 calls when -fpic/PIC because the plt call stubs use r30. */
27309 if (DEFAULT_ABI == ABI_DARWIN
27310 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27311 && decl
27312 && !DECL_EXTERNAL (decl)
27313 && !DECL_WEAK (decl)
27314 && (*targetm.binds_local_p) (decl))
27315 || (DEFAULT_ABI == ABI_V4
27316 && (!TARGET_SECURE_PLT
27317 || !flag_pic
27318 || (decl
27319 && (*targetm.binds_local_p) (decl)))))
27321 tree attr_list = TYPE_ATTRIBUTES (fntype);
27323 if (!lookup_attribute ("longcall", attr_list)
27324 || lookup_attribute ("shortcall", attr_list))
27325 return true;
27328 return false;
27331 static int
27332 rs6000_ra_ever_killed (void)
27334 rtx_insn *top;
27335 rtx reg;
27336 rtx_insn *insn;
27338 if (cfun->is_thunk)
27339 return 0;
27341 if (cfun->machine->lr_save_state)
27342 return cfun->machine->lr_save_state - 1;
27344 /* regs_ever_live has LR marked as used if any sibcalls are present,
27345 but this should not force saving and restoring in the
27346 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
27347 clobbers LR, so that is inappropriate. */
27349 /* Also, the prologue can generate a store into LR that
27350 doesn't really count, like this:
27352 move LR->R0
27353 bcl to set PIC register
27354 move LR->R31
27355 move R0->LR
27357 When we're called from the epilogue, we need to avoid counting
27358 this as a store. */
27360 push_topmost_sequence ();
27361 top = get_insns ();
27362 pop_topmost_sequence ();
27363 reg = gen_rtx_REG (Pmode, LR_REGNO);
27365 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
27367 if (INSN_P (insn))
27369 if (CALL_P (insn))
27371 if (!SIBLING_CALL_P (insn))
27372 return 1;
27374 else if (find_regno_note (insn, REG_INC, LR_REGNO))
27375 return 1;
27376 else if (set_of (reg, insn) != NULL_RTX
27377 && !prologue_epilogue_contains (insn))
27378 return 1;
27381 return 0;
27384 /* Emit instructions needed to load the TOC register.
27385 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
27386 a constant pool; or for SVR4 -fpic. */
27388 void
27389 rs6000_emit_load_toc_table (int fromprolog)
27391 rtx dest;
27392 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27394 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
27396 char buf[30];
27397 rtx lab, tmp1, tmp2, got;
27399 lab = gen_label_rtx ();
27400 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
27401 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
27402 if (flag_pic == 2)
27404 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
27405 need_toc_init = 1;
27407 else
27408 got = rs6000_got_sym ();
27409 tmp1 = tmp2 = dest;
27410 if (!fromprolog)
27412 tmp1 = gen_reg_rtx (Pmode);
27413 tmp2 = gen_reg_rtx (Pmode);
27415 emit_insn (gen_load_toc_v4_PIC_1 (lab));
27416 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
27417 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
27418 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
27420 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
27422 emit_insn (gen_load_toc_v4_pic_si ());
27423 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
27425 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
27427 char buf[30];
27428 rtx temp0 = (fromprolog
27429 ? gen_rtx_REG (Pmode, 0)
27430 : gen_reg_rtx (Pmode));
27432 if (fromprolog)
27434 rtx symF, symL;
27436 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27437 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
27439 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27440 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
27442 emit_insn (gen_load_toc_v4_PIC_1 (symF));
27443 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
27444 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
27446 else
27448 rtx tocsym, lab;
27450 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
27451 need_toc_init = 1;
27452 lab = gen_label_rtx ();
27453 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
27454 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
27455 if (TARGET_LINK_STACK)
27456 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
27457 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
27459 emit_insn (gen_addsi3 (dest, temp0, dest));
27461 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
27463 /* This is for AIX code running in non-PIC ELF32. */
27464 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
27466 need_toc_init = 1;
27467 emit_insn (gen_elf_high (dest, realsym));
27468 emit_insn (gen_elf_low (dest, dest, realsym));
27470 else
27472 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27474 if (TARGET_32BIT)
27475 emit_insn (gen_load_toc_aix_si (dest));
27476 else
27477 emit_insn (gen_load_toc_aix_di (dest));
27481 /* Emit instructions to restore the link register after determining where
27482 its value has been stored. */
27484 void
27485 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
27487 rs6000_stack_t *info = rs6000_stack_info ();
27488 rtx operands[2];
27490 operands[0] = source;
27491 operands[1] = scratch;
27493 if (info->lr_save_p)
27495 rtx frame_rtx = stack_pointer_rtx;
27496 HOST_WIDE_INT sp_offset = 0;
27497 rtx tmp;
27499 if (frame_pointer_needed
27500 || cfun->calls_alloca
27501 || info->total_size > 32767)
27503 tmp = gen_frame_mem (Pmode, frame_rtx);
27504 emit_move_insn (operands[1], tmp);
27505 frame_rtx = operands[1];
27507 else if (info->push_p)
27508 sp_offset = info->total_size;
27510 tmp = plus_constant (Pmode, frame_rtx,
27511 info->lr_save_offset + sp_offset);
27512 tmp = gen_frame_mem (Pmode, tmp);
27513 emit_move_insn (tmp, operands[0]);
27515 else
27516 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
27518 /* Freeze lr_save_p. We've just emitted rtl that depends on the
27519 state of lr_save_p so any change from here on would be a bug. In
27520 particular, stop rs6000_ra_ever_killed from considering the SET
27521 of lr we may have added just above. */
27522 cfun->machine->lr_save_state = info->lr_save_p + 1;
27525 static GTY(()) alias_set_type set = -1;
27527 alias_set_type
27528 get_TOC_alias_set (void)
27530 if (set == -1)
27531 set = new_alias_set ();
27532 return set;
27535 /* This returns nonzero if the current function uses the TOC. This is
27536 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
27537 is generated by the ABI_V4 load_toc_* patterns. */
27538 #if TARGET_ELF
27539 static int
27540 uses_TOC (void)
27542 rtx_insn *insn;
27544 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27545 if (INSN_P (insn))
27547 rtx pat = PATTERN (insn);
27548 int i;
27550 if (GET_CODE (pat) == PARALLEL)
27551 for (i = 0; i < XVECLEN (pat, 0); i++)
27553 rtx sub = XVECEXP (pat, 0, i);
27554 if (GET_CODE (sub) == USE)
27556 sub = XEXP (sub, 0);
27557 if (GET_CODE (sub) == UNSPEC
27558 && XINT (sub, 1) == UNSPEC_TOC)
27559 return 1;
27563 return 0;
27565 #endif
27568 create_TOC_reference (rtx symbol, rtx largetoc_reg)
27570 rtx tocrel, tocreg, hi;
27572 if (TARGET_DEBUG_ADDR)
27574 if (GET_CODE (symbol) == SYMBOL_REF)
27575 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
27576 XSTR (symbol, 0));
27577 else
27579 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
27580 GET_RTX_NAME (GET_CODE (symbol)));
27581 debug_rtx (symbol);
27585 if (!can_create_pseudo_p ())
27586 df_set_regs_ever_live (TOC_REGISTER, true);
27588 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
27589 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
27590 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
27591 return tocrel;
27593 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
27594 if (largetoc_reg != NULL)
27596 emit_move_insn (largetoc_reg, hi);
27597 hi = largetoc_reg;
27599 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
27602 /* Issue assembly directives that create a reference to the given DWARF
27603 FRAME_TABLE_LABEL from the current function section. */
27604 void
27605 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
27607 fprintf (asm_out_file, "\t.ref %s\n",
27608 (* targetm.strip_name_encoding) (frame_table_label));
27611 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
27612 and the change to the stack pointer. */
27614 static void
27615 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
27617 rtvec p;
27618 int i;
27619 rtx regs[3];
27621 i = 0;
27622 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27623 if (hard_frame_needed)
27624 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
27625 if (!(REGNO (fp) == STACK_POINTER_REGNUM
27626 || (hard_frame_needed
27627 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
27628 regs[i++] = fp;
27630 p = rtvec_alloc (i);
27631 while (--i >= 0)
27633 rtx mem = gen_frame_mem (BLKmode, regs[i]);
27634 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
27637 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
27640 /* Emit the correct code for allocating stack space, as insns.
27641 If COPY_REG, make sure a copy of the old frame is left there.
27642 The generated code may use hard register 0 as a temporary. */
27644 static rtx_insn *
27645 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
27647 rtx_insn *insn;
27648 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27649 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
27650 rtx todec = gen_int_mode (-size, Pmode);
27651 rtx par, set, mem;
27653 if (INTVAL (todec) != -size)
27655 warning (0, "stack frame too large");
27656 emit_insn (gen_trap ());
27657 return 0;
27660 if (crtl->limit_stack)
27662 if (REG_P (stack_limit_rtx)
27663 && REGNO (stack_limit_rtx) > 1
27664 && REGNO (stack_limit_rtx) <= 31)
27666 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
27667 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
27668 const0_rtx));
27670 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
27671 && TARGET_32BIT
27672 && DEFAULT_ABI == ABI_V4)
27674 rtx toload = gen_rtx_CONST (VOIDmode,
27675 gen_rtx_PLUS (Pmode,
27676 stack_limit_rtx,
27677 GEN_INT (size)));
27679 emit_insn (gen_elf_high (tmp_reg, toload));
27680 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
27681 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
27682 const0_rtx));
27684 else
27685 warning (0, "stack limit expression is not supported");
27688 if (copy_reg)
27690 if (copy_off != 0)
27691 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
27692 else
27693 emit_move_insn (copy_reg, stack_reg);
27696 if (size > 32767)
27698 /* Need a note here so that try_split doesn't get confused. */
27699 if (get_last_insn () == NULL_RTX)
27700 emit_note (NOTE_INSN_DELETED);
27701 insn = emit_move_insn (tmp_reg, todec);
27702 try_split (PATTERN (insn), insn, 0);
27703 todec = tmp_reg;
27706 insn = emit_insn (TARGET_32BIT
27707 ? gen_movsi_update_stack (stack_reg, stack_reg,
27708 todec, stack_reg)
27709 : gen_movdi_di_update_stack (stack_reg, stack_reg,
27710 todec, stack_reg));
27711 /* Since we didn't use gen_frame_mem to generate the MEM, grab
27712 it now and set the alias set/attributes. The above gen_*_update
27713 calls will generate a PARALLEL with the MEM set being the first
27714 operation. */
27715 par = PATTERN (insn);
27716 gcc_assert (GET_CODE (par) == PARALLEL);
27717 set = XVECEXP (par, 0, 0);
27718 gcc_assert (GET_CODE (set) == SET);
27719 mem = SET_DEST (set);
27720 gcc_assert (MEM_P (mem));
27721 MEM_NOTRAP_P (mem) = 1;
27722 set_mem_alias_set (mem, get_frame_alias_set ());
27724 RTX_FRAME_RELATED_P (insn) = 1;
27725 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27726 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
27727 GEN_INT (-size))));
27728 return insn;
27731 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
27733 #if PROBE_INTERVAL > 32768
27734 #error Cannot use indexed addressing mode for stack probing
27735 #endif
27737 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
27738 inclusive. These are offsets from the current stack pointer. */
27740 static void
27741 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
27743 /* See if we have a constant small number of probes to generate. If so,
27744 that's the easy case. */
27745 if (first + size <= 32768)
27747 HOST_WIDE_INT i;
27749 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
27750 it exceeds SIZE. If only one probe is needed, this will not
27751 generate any code. Then probe at FIRST + SIZE. */
27752 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
27753 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
27754 -(first + i)));
27756 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
27757 -(first + size)));
27760 /* Otherwise, do the same as above, but in a loop. Note that we must be
27761 extra careful with variables wrapping around because we might be at
27762 the very top (or the very bottom) of the address space and we have
27763 to be able to handle this case properly; in particular, we use an
27764 equality test for the loop condition. */
27765 else
27767 HOST_WIDE_INT rounded_size;
27768 rtx r12 = gen_rtx_REG (Pmode, 12);
27769 rtx r0 = gen_rtx_REG (Pmode, 0);
27771 /* Sanity check for the addressing mode we're going to use. */
27772 gcc_assert (first <= 32768);
27774 /* Step 1: round SIZE to the previous multiple of the interval. */
27776 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
27779 /* Step 2: compute initial and final value of the loop counter. */
27781 /* TEST_ADDR = SP + FIRST. */
27782 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
27783 -first)));
27785 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
27786 if (rounded_size > 32768)
27788 emit_move_insn (r0, GEN_INT (-rounded_size));
27789 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
27791 else
27792 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
27793 -rounded_size)));
27796 /* Step 3: the loop
27800 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
27801 probe at TEST_ADDR
27803 while (TEST_ADDR != LAST_ADDR)
27805 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
27806 until it is equal to ROUNDED_SIZE. */
27808 if (TARGET_64BIT)
27809 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
27810 else
27811 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
27814 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
27815 that SIZE is equal to ROUNDED_SIZE. */
27817 if (size != rounded_size)
27818 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
27822 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
27823 absolute addresses. */
27825 const char *
27826 output_probe_stack_range (rtx reg1, rtx reg2)
27828 static int labelno = 0;
27829 char loop_lab[32];
27830 rtx xops[2];
27832 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
27834 /* Loop. */
27835 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
27837 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
27838 xops[0] = reg1;
27839 xops[1] = GEN_INT (-PROBE_INTERVAL);
27840 output_asm_insn ("addi %0,%0,%1", xops);
27842 /* Probe at TEST_ADDR. */
27843 xops[1] = gen_rtx_REG (Pmode, 0);
27844 output_asm_insn ("stw %1,0(%0)", xops);
27846 /* Test if TEST_ADDR == LAST_ADDR. */
27847 xops[1] = reg2;
27848 if (TARGET_64BIT)
27849 output_asm_insn ("cmpd 0,%0,%1", xops);
27850 else
27851 output_asm_insn ("cmpw 0,%0,%1", xops);
27853 /* Branch. */
27854 fputs ("\tbne 0,", asm_out_file);
27855 assemble_name_raw (asm_out_file, loop_lab);
27856 fputc ('\n', asm_out_file);
27858 return "";
27861 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
27862 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
27863 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
27864 deduce these equivalences by itself so it wasn't necessary to hold
27865 its hand so much. Don't be tempted to always supply d2_f_d_e with
27866 the actual cfa register, ie. r31 when we are using a hard frame
27867 pointer. That fails when saving regs off r1, and sched moves the
27868 r31 setup past the reg saves. */
27870 static rtx_insn *
27871 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
27872 rtx reg2, rtx repl2)
27874 rtx repl;
27876 if (REGNO (reg) == STACK_POINTER_REGNUM)
27878 gcc_checking_assert (val == 0);
27879 repl = NULL_RTX;
27881 else
27882 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27883 GEN_INT (val));
27885 rtx pat = PATTERN (insn);
27886 if (!repl && !reg2)
27888 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
27889 if (GET_CODE (pat) == PARALLEL)
27890 for (int i = 0; i < XVECLEN (pat, 0); i++)
27891 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27893 rtx set = XVECEXP (pat, 0, i);
27895 /* If this PARALLEL has been emitted for out-of-line
27896 register save functions, or store multiple, then omit
27897 eh_frame info for any user-defined global regs. If
27898 eh_frame info is supplied, frame unwinding will
27899 restore a user reg. */
27900 if (!REG_P (SET_SRC (set))
27901 || !fixed_reg_p (REGNO (SET_SRC (set))))
27902 RTX_FRAME_RELATED_P (set) = 1;
27904 RTX_FRAME_RELATED_P (insn) = 1;
27905 return insn;
27908 /* We expect that 'pat' is either a SET or a PARALLEL containing
27909 SETs (and possibly other stuff). In a PARALLEL, all the SETs
27910 are important so they all have to be marked RTX_FRAME_RELATED_P.
27911 Call simplify_replace_rtx on the SETs rather than the whole insn
27912 so as to leave the other stuff alone (for example USE of r12). */
27914 set_used_flags (pat);
27915 if (GET_CODE (pat) == SET)
27917 if (repl)
27918 pat = simplify_replace_rtx (pat, reg, repl);
27919 if (reg2)
27920 pat = simplify_replace_rtx (pat, reg2, repl2);
27922 else if (GET_CODE (pat) == PARALLEL)
27924 pat = shallow_copy_rtx (pat);
27925 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
27927 for (int i = 0; i < XVECLEN (pat, 0); i++)
27928 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27930 rtx set = XVECEXP (pat, 0, i);
27932 if (repl)
27933 set = simplify_replace_rtx (set, reg, repl);
27934 if (reg2)
27935 set = simplify_replace_rtx (set, reg2, repl2);
27936 XVECEXP (pat, 0, i) = set;
27938 /* Omit eh_frame info for any user-defined global regs. */
27939 if (!REG_P (SET_SRC (set))
27940 || !fixed_reg_p (REGNO (SET_SRC (set))))
27941 RTX_FRAME_RELATED_P (set) = 1;
27944 else
27945 gcc_unreachable ();
27947 RTX_FRAME_RELATED_P (insn) = 1;
27948 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
27950 return insn;
27953 /* Returns an insn that has a vrsave set operation with the
27954 appropriate CLOBBERs. */
27956 static rtx
27957 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
27959 int nclobs, i;
27960 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
27961 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
27963 clobs[0]
27964 = gen_rtx_SET (vrsave,
27965 gen_rtx_UNSPEC_VOLATILE (SImode,
27966 gen_rtvec (2, reg, vrsave),
27967 UNSPECV_SET_VRSAVE));
27969 nclobs = 1;
27971 /* We need to clobber the registers in the mask so the scheduler
27972 does not move sets to VRSAVE before sets of AltiVec registers.
27974 However, if the function receives nonlocal gotos, reload will set
27975 all call saved registers live. We will end up with:
27977 (set (reg 999) (mem))
27978 (parallel [ (set (reg vrsave) (unspec blah))
27979 (clobber (reg 999))])
27981 The clobber will cause the store into reg 999 to be dead, and
27982 flow will attempt to delete an epilogue insn. In this case, we
27983 need an unspec use/set of the register. */
27985 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27986 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27988 if (!epiloguep || call_used_regs [i])
27989 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
27990 gen_rtx_REG (V4SImode, i));
27991 else
27993 rtx reg = gen_rtx_REG (V4SImode, i);
27995 clobs[nclobs++]
27996 = gen_rtx_SET (reg,
27997 gen_rtx_UNSPEC (V4SImode,
27998 gen_rtvec (1, reg), 27));
28002 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
28004 for (i = 0; i < nclobs; ++i)
28005 XVECEXP (insn, 0, i) = clobs[i];
28007 return insn;
28010 static rtx
28011 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
28013 rtx addr, mem;
28015 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
28016 mem = gen_frame_mem (GET_MODE (reg), addr);
28017 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
28020 static rtx
28021 gen_frame_load (rtx reg, rtx frame_reg, int offset)
28023 return gen_frame_set (reg, frame_reg, offset, false);
28026 static rtx
28027 gen_frame_store (rtx reg, rtx frame_reg, int offset)
28029 return gen_frame_set (reg, frame_reg, offset, true);
28032 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28033 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28035 static rtx_insn *
28036 emit_frame_save (rtx frame_reg, machine_mode mode,
28037 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
28039 rtx reg;
28041 /* Some cases that need register indexed addressing. */
28042 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
28043 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
28044 || (TARGET_E500_DOUBLE && mode == DFmode)
28045 || (TARGET_SPE_ABI
28046 && SPE_VECTOR_MODE (mode)
28047 && !SPE_CONST_OFFSET_OK (offset))));
28049 reg = gen_rtx_REG (mode, regno);
28050 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
28051 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
28052 NULL_RTX, NULL_RTX);
28055 /* Emit an offset memory reference suitable for a frame store, while
28056 converting to a valid addressing mode. */
28058 static rtx
28059 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
28061 rtx int_rtx, offset_rtx;
28063 int_rtx = GEN_INT (offset);
28065 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
28066 || (TARGET_E500_DOUBLE && mode == DFmode))
28068 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
28069 emit_move_insn (offset_rtx, int_rtx);
28071 else
28072 offset_rtx = int_rtx;
28074 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
28077 #ifndef TARGET_FIX_AND_CONTINUE
28078 #define TARGET_FIX_AND_CONTINUE 0
28079 #endif
28081 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28082 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28083 #define LAST_SAVRES_REGISTER 31
28084 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28086 enum {
28087 SAVRES_LR = 0x1,
28088 SAVRES_SAVE = 0x2,
28089 SAVRES_REG = 0x0c,
28090 SAVRES_GPR = 0,
28091 SAVRES_FPR = 4,
28092 SAVRES_VR = 8
28095 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
28097 /* Temporary holding space for an out-of-line register save/restore
28098 routine name. */
28099 static char savres_routine_name[30];
28101 /* Return the name for an out-of-line register save/restore routine.
28102 We are saving/restoring GPRs if GPR is true. */
28104 static char *
28105 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
28107 const char *prefix = "";
28108 const char *suffix = "";
28110 /* Different targets are supposed to define
28111 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28112 routine name could be defined with:
28114 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28116 This is a nice idea in practice, but in reality, things are
28117 complicated in several ways:
28119 - ELF targets have save/restore routines for GPRs.
28121 - SPE targets use different prefixes for 32/64-bit registers, and
28122 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28124 - PPC64 ELF targets have routines for save/restore of GPRs that
28125 differ in what they do with the link register, so having a set
28126 prefix doesn't work. (We only use one of the save routines at
28127 the moment, though.)
28129 - PPC32 elf targets have "exit" versions of the restore routines
28130 that restore the link register and can save some extra space.
28131 These require an extra suffix. (There are also "tail" versions
28132 of the restore routines and "GOT" versions of the save routines,
28133 but we don't generate those at present. Same problems apply,
28134 though.)
28136 We deal with all this by synthesizing our own prefix/suffix and
28137 using that for the simple sprintf call shown above. */
28138 if (TARGET_SPE)
28140 /* No floating point saves on the SPE. */
28141 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
28143 if ((sel & SAVRES_SAVE))
28144 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
28145 else
28146 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
28148 if ((sel & SAVRES_LR))
28149 suffix = "_x";
28151 else if (DEFAULT_ABI == ABI_V4)
28153 if (TARGET_64BIT)
28154 goto aix_names;
28156 if ((sel & SAVRES_REG) == SAVRES_GPR)
28157 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
28158 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28159 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
28160 else if ((sel & SAVRES_REG) == SAVRES_VR)
28161 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28162 else
28163 abort ();
28165 if ((sel & SAVRES_LR))
28166 suffix = "_x";
28168 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28170 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28171 /* No out-of-line save/restore routines for GPRs on AIX. */
28172 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
28173 #endif
28175 aix_names:
28176 if ((sel & SAVRES_REG) == SAVRES_GPR)
28177 prefix = ((sel & SAVRES_SAVE)
28178 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
28179 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
28180 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28182 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
28183 if ((sel & SAVRES_LR))
28184 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
28185 else
28186 #endif
28188 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
28189 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
28192 else if ((sel & SAVRES_REG) == SAVRES_VR)
28193 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28194 else
28195 abort ();
28198 if (DEFAULT_ABI == ABI_DARWIN)
28200 /* The Darwin approach is (slightly) different, in order to be
28201 compatible with code generated by the system toolchain. There is a
28202 single symbol for the start of save sequence, and the code here
28203 embeds an offset into that code on the basis of the first register
28204 to be saved. */
28205 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
28206 if ((sel & SAVRES_REG) == SAVRES_GPR)
28207 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
28208 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
28209 (regno - 13) * 4, prefix, regno);
28210 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28211 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
28212 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
28213 else if ((sel & SAVRES_REG) == SAVRES_VR)
28214 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
28215 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
28216 else
28217 abort ();
28219 else
28220 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
28222 return savres_routine_name;
28225 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
28226 We are saving/restoring GPRs if GPR is true. */
28228 static rtx
28229 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
28231 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
28232 ? info->first_gp_reg_save
28233 : (sel & SAVRES_REG) == SAVRES_FPR
28234 ? info->first_fp_reg_save - 32
28235 : (sel & SAVRES_REG) == SAVRES_VR
28236 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
28237 : -1);
28238 rtx sym;
28239 int select = sel;
28241 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
28242 versions of the gpr routines. */
28243 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
28244 && info->spe_64bit_regs_used)
28245 select ^= SAVRES_FPR ^ SAVRES_GPR;
28247 /* Don't generate bogus routine names. */
28248 gcc_assert (FIRST_SAVRES_REGISTER <= regno
28249 && regno <= LAST_SAVRES_REGISTER
28250 && select >= 0 && select <= 12);
28252 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
28254 if (sym == NULL)
28256 char *name;
28258 name = rs6000_savres_routine_name (info, regno, sel);
28260 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
28261 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
28262 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
28265 return sym;
28268 /* Emit a sequence of insns, including a stack tie if needed, for
28269 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
28270 reset the stack pointer, but move the base of the frame into
28271 reg UPDT_REGNO for use by out-of-line register restore routines. */
28273 static rtx
28274 rs6000_emit_stack_reset (rs6000_stack_t *info,
28275 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
28276 unsigned updt_regno)
28278 /* If there is nothing to do, don't do anything. */
28279 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
28280 return NULL_RTX;
28282 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
28284 /* This blockage is needed so that sched doesn't decide to move
28285 the sp change before the register restores. */
28286 if (DEFAULT_ABI == ABI_V4
28287 || (TARGET_SPE_ABI
28288 && info->spe_64bit_regs_used != 0
28289 && info->first_gp_reg_save != 32))
28290 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
28291 GEN_INT (frame_off)));
28293 /* If we are restoring registers out-of-line, we will be using the
28294 "exit" variants of the restore routines, which will reset the
28295 stack for us. But we do need to point updt_reg into the
28296 right place for those routines. */
28297 if (frame_off != 0)
28298 return emit_insn (gen_add3_insn (updt_reg_rtx,
28299 frame_reg_rtx, GEN_INT (frame_off)));
28300 else
28301 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
28303 return NULL_RTX;
28306 /* Return the register number used as a pointer by out-of-line
28307 save/restore functions. */
28309 static inline unsigned
28310 ptr_regno_for_savres (int sel)
28312 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28313 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
28314 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
28317 /* Construct a parallel rtx describing the effect of a call to an
28318 out-of-line register save/restore routine, and emit the insn
28319 or jump_insn as appropriate. */
28321 static rtx_insn *
28322 rs6000_emit_savres_rtx (rs6000_stack_t *info,
28323 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
28324 machine_mode reg_mode, int sel)
28326 int i;
28327 int offset, start_reg, end_reg, n_regs, use_reg;
28328 int reg_size = GET_MODE_SIZE (reg_mode);
28329 rtx sym;
28330 rtvec p;
28331 rtx par;
28332 rtx_insn *insn;
28334 offset = 0;
28335 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
28336 ? info->first_gp_reg_save
28337 : (sel & SAVRES_REG) == SAVRES_FPR
28338 ? info->first_fp_reg_save
28339 : (sel & SAVRES_REG) == SAVRES_VR
28340 ? info->first_altivec_reg_save
28341 : -1);
28342 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
28343 ? 32
28344 : (sel & SAVRES_REG) == SAVRES_FPR
28345 ? 64
28346 : (sel & SAVRES_REG) == SAVRES_VR
28347 ? LAST_ALTIVEC_REGNO + 1
28348 : -1);
28349 n_regs = end_reg - start_reg;
28350 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
28351 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
28352 + n_regs);
28354 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
28355 RTVEC_ELT (p, offset++) = ret_rtx;
28357 RTVEC_ELT (p, offset++)
28358 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
28360 sym = rs6000_savres_routine_sym (info, sel);
28361 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
28363 use_reg = ptr_regno_for_savres (sel);
28364 if ((sel & SAVRES_REG) == SAVRES_VR)
28366 /* Vector regs are saved/restored using [reg+reg] addressing. */
28367 RTVEC_ELT (p, offset++)
28368 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
28369 RTVEC_ELT (p, offset++)
28370 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
28372 else
28373 RTVEC_ELT (p, offset++)
28374 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
28376 for (i = 0; i < end_reg - start_reg; i++)
28377 RTVEC_ELT (p, i + offset)
28378 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
28379 frame_reg_rtx, save_area_offset + reg_size * i,
28380 (sel & SAVRES_SAVE) != 0);
28382 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
28383 RTVEC_ELT (p, i + offset)
28384 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
28386 par = gen_rtx_PARALLEL (VOIDmode, p);
28388 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
28390 insn = emit_jump_insn (par);
28391 JUMP_LABEL (insn) = ret_rtx;
28393 else
28394 insn = emit_insn (par);
28395 return insn;
28398 /* Emit code to store CR fields that need to be saved into REG. */
28400 static void
28401 rs6000_emit_move_from_cr (rtx reg)
28403 /* Only the ELFv2 ABI allows storing only selected fields. */
28404 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
28406 int i, cr_reg[8], count = 0;
28408 /* Collect CR fields that must be saved. */
28409 for (i = 0; i < 8; i++)
28410 if (save_reg_p (CR0_REGNO + i))
28411 cr_reg[count++] = i;
28413 /* If it's just a single one, use mfcrf. */
28414 if (count == 1)
28416 rtvec p = rtvec_alloc (1);
28417 rtvec r = rtvec_alloc (2);
28418 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
28419 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
28420 RTVEC_ELT (p, 0)
28421 = gen_rtx_SET (reg,
28422 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
28424 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28425 return;
28428 /* ??? It might be better to handle count == 2 / 3 cases here
28429 as well, using logical operations to combine the values. */
28432 emit_insn (gen_movesi_from_cr (reg));
28435 /* Return whether the split-stack arg pointer (r12) is used. */
28437 static bool
28438 split_stack_arg_pointer_used_p (void)
28440 /* If the pseudo holding the arg pointer is no longer a pseudo,
28441 then the arg pointer is used. */
28442 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
28443 && (!REG_P (cfun->machine->split_stack_arg_pointer)
28444 || (REGNO (cfun->machine->split_stack_arg_pointer)
28445 < FIRST_PSEUDO_REGISTER)))
28446 return true;
28448 /* Unfortunately we also need to do some code scanning, since
28449 r12 may have been substituted for the pseudo. */
28450 rtx_insn *insn;
28451 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
28452 FOR_BB_INSNS (bb, insn)
28453 if (NONDEBUG_INSN_P (insn))
28455 /* A call destroys r12. */
28456 if (CALL_P (insn))
28457 return false;
28459 df_ref use;
28460 FOR_EACH_INSN_USE (use, insn)
28462 rtx x = DF_REF_REG (use);
28463 if (REG_P (x) && REGNO (x) == 12)
28464 return true;
28466 df_ref def;
28467 FOR_EACH_INSN_DEF (def, insn)
28469 rtx x = DF_REF_REG (def);
28470 if (REG_P (x) && REGNO (x) == 12)
28471 return false;
28474 return bitmap_bit_p (DF_LR_OUT (bb), 12);
28477 /* Return whether we need to emit an ELFv2 global entry point prologue. */
28479 static bool
28480 rs6000_global_entry_point_needed_p (void)
28482 /* Only needed for the ELFv2 ABI. */
28483 if (DEFAULT_ABI != ABI_ELFv2)
28484 return false;
28486 /* With -msingle-pic-base, we assume the whole program shares the same
28487 TOC, so no global entry point prologues are needed anywhere. */
28488 if (TARGET_SINGLE_PIC_BASE)
28489 return false;
28491 /* Ensure we have a global entry point for thunks. ??? We could
28492 avoid that if the target routine doesn't need a global entry point,
28493 but we do not know whether this is the case at this point. */
28494 if (cfun->is_thunk)
28495 return true;
28497 /* For regular functions, rs6000_emit_prologue sets this flag if the
28498 routine ever uses the TOC pointer. */
28499 return cfun->machine->r2_setup_needed;
28502 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
28503 static sbitmap
28504 rs6000_get_separate_components (void)
28506 rs6000_stack_t *info = rs6000_stack_info ();
28508 if (WORLD_SAVE_P (info))
28509 return NULL;
28511 if (TARGET_SPE_ABI)
28512 return NULL;
28514 sbitmap components = sbitmap_alloc (32);
28515 bitmap_clear (components);
28517 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
28518 && !(info->savres_strategy & REST_MULTIPLE));
28520 /* The GPRs we need saved to the frame. */
28521 if ((info->savres_strategy & SAVE_INLINE_GPRS)
28522 && (info->savres_strategy & REST_INLINE_GPRS))
28524 int reg_size = TARGET_32BIT ? 4 : 8;
28525 int offset = info->gp_save_offset;
28526 if (info->push_p)
28527 offset += info->total_size;
28529 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
28531 if (IN_RANGE (offset, -0x8000, 0x7fff)
28532 && rs6000_reg_live_or_pic_offset_p (regno))
28533 bitmap_set_bit (components, regno);
28535 offset += reg_size;
28539 /* Don't mess with the hard frame pointer. */
28540 if (frame_pointer_needed)
28541 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
28543 /* Don't mess with the fixed TOC register. */
28544 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
28545 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
28546 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
28547 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
28549 /* Optimize LR save and restore if we can. This is component 0. Any
28550 out-of-line register save/restore routines need LR. */
28551 if (info->lr_save_p
28552 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
28553 && (info->savres_strategy & SAVE_INLINE_GPRS)
28554 && (info->savres_strategy & REST_INLINE_GPRS)
28555 && (info->savres_strategy & SAVE_INLINE_FPRS)
28556 && (info->savres_strategy & REST_INLINE_FPRS)
28557 && (info->savres_strategy & SAVE_INLINE_VRS)
28558 && (info->savres_strategy & REST_INLINE_VRS))
28560 int offset = info->lr_save_offset;
28561 if (info->push_p)
28562 offset += info->total_size;
28563 if (IN_RANGE (offset, -0x8000, 0x7fff))
28564 bitmap_set_bit (components, 0);
28567 return components;
28570 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
28571 static sbitmap
28572 rs6000_components_for_bb (basic_block bb)
28574 rs6000_stack_t *info = rs6000_stack_info ();
28576 bitmap in = DF_LIVE_IN (bb);
28577 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
28578 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
28580 sbitmap components = sbitmap_alloc (32);
28581 bitmap_clear (components);
28583 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
28584 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
28585 if (bitmap_bit_p (in, regno)
28586 || bitmap_bit_p (gen, regno)
28587 || bitmap_bit_p (kill, regno))
28588 bitmap_set_bit (components, regno);
28590 /* LR needs to be saved around a bb if it is killed in that bb. */
28591 if (bitmap_bit_p (in, LR_REGNO)
28592 || bitmap_bit_p (gen, LR_REGNO)
28593 || bitmap_bit_p (kill, LR_REGNO))
28594 bitmap_set_bit (components, 0);
28596 return components;
28599 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
28600 static void
28601 rs6000_disqualify_components (sbitmap components, edge e,
28602 sbitmap edge_components, bool /*is_prologue*/)
28604 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
28605 live where we want to place that code. */
28606 if (bitmap_bit_p (edge_components, 0)
28607 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
28609 if (dump_file)
28610 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
28611 "on entry to bb %d\n", e->dest->index);
28612 bitmap_clear_bit (components, 0);
28616 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
28617 static void
28618 rs6000_emit_prologue_components (sbitmap components)
28620 rs6000_stack_t *info = rs6000_stack_info ();
28621 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
28622 ? HARD_FRAME_POINTER_REGNUM
28623 : STACK_POINTER_REGNUM);
28624 int reg_size = TARGET_32BIT ? 4 : 8;
28626 /* Prologue for LR. */
28627 if (bitmap_bit_p (components, 0))
28629 rtx reg = gen_rtx_REG (Pmode, 0);
28630 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
28631 RTX_FRAME_RELATED_P (insn) = 1;
28632 add_reg_note (insn, REG_CFA_REGISTER, NULL);
28634 int offset = info->lr_save_offset;
28635 if (info->push_p)
28636 offset += info->total_size;
28638 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
28639 RTX_FRAME_RELATED_P (insn) = 1;
28640 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28641 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
28642 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
28645 /* Prologue for the GPRs. */
28646 int offset = info->gp_save_offset;
28647 if (info->push_p)
28648 offset += info->total_size;
28650 for (int i = info->first_gp_reg_save; i < 32; i++)
28652 if (bitmap_bit_p (components, i))
28654 rtx reg = gen_rtx_REG (Pmode, i);
28655 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
28656 RTX_FRAME_RELATED_P (insn) = 1;
28657 rtx set = copy_rtx (single_set (insn));
28658 add_reg_note (insn, REG_CFA_OFFSET, set);
28661 offset += reg_size;
28665 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
28666 static void
28667 rs6000_emit_epilogue_components (sbitmap components)
28669 rs6000_stack_t *info = rs6000_stack_info ();
28670 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
28671 ? HARD_FRAME_POINTER_REGNUM
28672 : STACK_POINTER_REGNUM);
28673 int reg_size = TARGET_32BIT ? 4 : 8;
28675 /* Epilogue for the GPRs. */
28676 int offset = info->gp_save_offset;
28677 if (info->push_p)
28678 offset += info->total_size;
28680 for (int i = info->first_gp_reg_save; i < 32; i++)
28682 if (bitmap_bit_p (components, i))
28684 rtx reg = gen_rtx_REG (Pmode, i);
28685 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
28686 RTX_FRAME_RELATED_P (insn) = 1;
28687 add_reg_note (insn, REG_CFA_RESTORE, reg);
28690 offset += reg_size;
28693 /* Epilogue for LR. */
28694 if (bitmap_bit_p (components, 0))
28696 int offset = info->lr_save_offset;
28697 if (info->push_p)
28698 offset += info->total_size;
28700 rtx reg = gen_rtx_REG (Pmode, 0);
28701 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
28703 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28704 insn = emit_move_insn (lr, reg);
28705 RTX_FRAME_RELATED_P (insn) = 1;
28706 add_reg_note (insn, REG_CFA_RESTORE, lr);
28710 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
28711 static void
28712 rs6000_set_handled_components (sbitmap components)
28714 rs6000_stack_t *info = rs6000_stack_info ();
28716 for (int i = info->first_gp_reg_save; i < 32; i++)
28717 if (bitmap_bit_p (components, i))
28718 cfun->machine->gpr_is_wrapped_separately[i] = true;
28720 if (bitmap_bit_p (components, 0))
28721 cfun->machine->lr_is_wrapped_separately = true;
28724 /* Emit function prologue as insns. */
28726 void
28727 rs6000_emit_prologue (void)
28729 rs6000_stack_t *info = rs6000_stack_info ();
28730 machine_mode reg_mode = Pmode;
28731 int reg_size = TARGET_32BIT ? 4 : 8;
28732 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28733 rtx frame_reg_rtx = sp_reg_rtx;
28734 unsigned int cr_save_regno;
28735 rtx cr_save_rtx = NULL_RTX;
28736 rtx_insn *insn;
28737 int strategy;
28738 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
28739 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
28740 && call_used_regs[STATIC_CHAIN_REGNUM]);
28741 int using_split_stack = (flag_split_stack
28742 && (lookup_attribute ("no_split_stack",
28743 DECL_ATTRIBUTES (cfun->decl))
28744 == NULL));
28746 /* Offset to top of frame for frame_reg and sp respectively. */
28747 HOST_WIDE_INT frame_off = 0;
28748 HOST_WIDE_INT sp_off = 0;
28749 /* sp_adjust is the stack adjusting instruction, tracked so that the
28750 insn setting up the split-stack arg pointer can be emitted just
28751 prior to it, when r12 is not used here for other purposes. */
28752 rtx_insn *sp_adjust = 0;
28754 #if CHECKING_P
28755 /* Track and check usage of r0, r11, r12. */
28756 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
28757 #define START_USE(R) do \
28759 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
28760 reg_inuse |= 1 << (R); \
28761 } while (0)
28762 #define END_USE(R) do \
28764 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
28765 reg_inuse &= ~(1 << (R)); \
28766 } while (0)
28767 #define NOT_INUSE(R) do \
28769 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
28770 } while (0)
28771 #else
28772 #define START_USE(R) do {} while (0)
28773 #define END_USE(R) do {} while (0)
28774 #define NOT_INUSE(R) do {} while (0)
28775 #endif
28777 if (DEFAULT_ABI == ABI_ELFv2
28778 && !TARGET_SINGLE_PIC_BASE)
28780 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
28782 /* With -mminimal-toc we may generate an extra use of r2 below. */
28783 if (TARGET_TOC && TARGET_MINIMAL_TOC
28784 && !constant_pool_empty_p ())
28785 cfun->machine->r2_setup_needed = true;
28789 if (flag_stack_usage_info)
28790 current_function_static_stack_size = info->total_size;
28792 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
28794 HOST_WIDE_INT size = info->total_size;
28796 if (crtl->is_leaf && !cfun->calls_alloca)
28798 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
28799 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
28800 size - STACK_CHECK_PROTECT);
28802 else if (size > 0)
28803 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
28806 if (TARGET_FIX_AND_CONTINUE)
28808 /* gdb on darwin arranges to forward a function from the old
28809 address by modifying the first 5 instructions of the function
28810 to branch to the overriding function. This is necessary to
28811 permit function pointers that point to the old function to
28812 actually forward to the new function. */
28813 emit_insn (gen_nop ());
28814 emit_insn (gen_nop ());
28815 emit_insn (gen_nop ());
28816 emit_insn (gen_nop ());
28817 emit_insn (gen_nop ());
28820 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
28822 reg_mode = V2SImode;
28823 reg_size = 8;
28826 /* Handle world saves specially here. */
28827 if (WORLD_SAVE_P (info))
28829 int i, j, sz;
28830 rtx treg;
28831 rtvec p;
28832 rtx reg0;
28834 /* save_world expects lr in r0. */
28835 reg0 = gen_rtx_REG (Pmode, 0);
28836 if (info->lr_save_p)
28838 insn = emit_move_insn (reg0,
28839 gen_rtx_REG (Pmode, LR_REGNO));
28840 RTX_FRAME_RELATED_P (insn) = 1;
28843 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
28844 assumptions about the offsets of various bits of the stack
28845 frame. */
28846 gcc_assert (info->gp_save_offset == -220
28847 && info->fp_save_offset == -144
28848 && info->lr_save_offset == 8
28849 && info->cr_save_offset == 4
28850 && info->push_p
28851 && info->lr_save_p
28852 && (!crtl->calls_eh_return
28853 || info->ehrd_offset == -432)
28854 && info->vrsave_save_offset == -224
28855 && info->altivec_save_offset == -416);
28857 treg = gen_rtx_REG (SImode, 11);
28858 emit_move_insn (treg, GEN_INT (-info->total_size));
28860 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
28861 in R11. It also clobbers R12, so beware! */
28863 /* Preserve CR2 for save_world prologues */
28864 sz = 5;
28865 sz += 32 - info->first_gp_reg_save;
28866 sz += 64 - info->first_fp_reg_save;
28867 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
28868 p = rtvec_alloc (sz);
28869 j = 0;
28870 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
28871 gen_rtx_REG (SImode,
28872 LR_REGNO));
28873 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
28874 gen_rtx_SYMBOL_REF (Pmode,
28875 "*save_world"));
28876 /* We do floats first so that the instruction pattern matches
28877 properly. */
28878 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28879 RTVEC_ELT (p, j++)
28880 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28881 ? DFmode : SFmode,
28882 info->first_fp_reg_save + i),
28883 frame_reg_rtx,
28884 info->fp_save_offset + frame_off + 8 * i);
28885 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28886 RTVEC_ELT (p, j++)
28887 = gen_frame_store (gen_rtx_REG (V4SImode,
28888 info->first_altivec_reg_save + i),
28889 frame_reg_rtx,
28890 info->altivec_save_offset + frame_off + 16 * i);
28891 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28892 RTVEC_ELT (p, j++)
28893 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28894 frame_reg_rtx,
28895 info->gp_save_offset + frame_off + reg_size * i);
28897 /* CR register traditionally saved as CR2. */
28898 RTVEC_ELT (p, j++)
28899 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
28900 frame_reg_rtx, info->cr_save_offset + frame_off);
28901 /* Explain about use of R0. */
28902 if (info->lr_save_p)
28903 RTVEC_ELT (p, j++)
28904 = gen_frame_store (reg0,
28905 frame_reg_rtx, info->lr_save_offset + frame_off);
28906 /* Explain what happens to the stack pointer. */
28908 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
28909 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
28912 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28913 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28914 treg, GEN_INT (-info->total_size));
28915 sp_off = frame_off = info->total_size;
28918 strategy = info->savres_strategy;
28920 /* For V.4, update stack before we do any saving and set back pointer. */
28921 if (! WORLD_SAVE_P (info)
28922 && info->push_p
28923 && (DEFAULT_ABI == ABI_V4
28924 || crtl->calls_eh_return))
28926 bool need_r11 = (TARGET_SPE
28927 ? (!(strategy & SAVE_INLINE_GPRS)
28928 && info->spe_64bit_regs_used == 0)
28929 : (!(strategy & SAVE_INLINE_FPRS)
28930 || !(strategy & SAVE_INLINE_GPRS)
28931 || !(strategy & SAVE_INLINE_VRS)));
28932 int ptr_regno = -1;
28933 rtx ptr_reg = NULL_RTX;
28934 int ptr_off = 0;
28936 if (info->total_size < 32767)
28937 frame_off = info->total_size;
28938 else if (need_r11)
28939 ptr_regno = 11;
28940 else if (info->cr_save_p
28941 || info->lr_save_p
28942 || info->first_fp_reg_save < 64
28943 || info->first_gp_reg_save < 32
28944 || info->altivec_size != 0
28945 || info->vrsave_size != 0
28946 || crtl->calls_eh_return)
28947 ptr_regno = 12;
28948 else
28950 /* The prologue won't be saving any regs so there is no need
28951 to set up a frame register to access any frame save area.
28952 We also won't be using frame_off anywhere below, but set
28953 the correct value anyway to protect against future
28954 changes to this function. */
28955 frame_off = info->total_size;
28957 if (ptr_regno != -1)
28959 /* Set up the frame offset to that needed by the first
28960 out-of-line save function. */
28961 START_USE (ptr_regno);
28962 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28963 frame_reg_rtx = ptr_reg;
28964 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
28965 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
28966 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
28967 ptr_off = info->gp_save_offset + info->gp_size;
28968 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
28969 ptr_off = info->altivec_save_offset + info->altivec_size;
28970 frame_off = -ptr_off;
28972 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28973 ptr_reg, ptr_off);
28974 if (REGNO (frame_reg_rtx) == 12)
28975 sp_adjust = 0;
28976 sp_off = info->total_size;
28977 if (frame_reg_rtx != sp_reg_rtx)
28978 rs6000_emit_stack_tie (frame_reg_rtx, false);
28981 /* If we use the link register, get it into r0. */
28982 if (!WORLD_SAVE_P (info) && info->lr_save_p
28983 && !cfun->machine->lr_is_wrapped_separately)
28985 rtx addr, reg, mem;
28987 reg = gen_rtx_REG (Pmode, 0);
28988 START_USE (0);
28989 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
28990 RTX_FRAME_RELATED_P (insn) = 1;
28992 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
28993 | SAVE_NOINLINE_FPRS_SAVES_LR)))
28995 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28996 GEN_INT (info->lr_save_offset + frame_off));
28997 mem = gen_rtx_MEM (Pmode, addr);
28998 /* This should not be of rs6000_sr_alias_set, because of
28999 __builtin_return_address. */
29001 insn = emit_move_insn (mem, reg);
29002 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29003 NULL_RTX, NULL_RTX);
29004 END_USE (0);
29008 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29009 r12 will be needed by out-of-line gpr restore. */
29010 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29011 && !(strategy & (SAVE_INLINE_GPRS
29012 | SAVE_NOINLINE_GPRS_SAVES_LR))
29013 ? 11 : 12);
29014 if (!WORLD_SAVE_P (info)
29015 && info->cr_save_p
29016 && REGNO (frame_reg_rtx) != cr_save_regno
29017 && !(using_static_chain_p && cr_save_regno == 11)
29018 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
29020 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
29021 START_USE (cr_save_regno);
29022 rs6000_emit_move_from_cr (cr_save_rtx);
29025 /* Do any required saving of fpr's. If only one or two to save, do
29026 it ourselves. Otherwise, call function. */
29027 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
29029 int i;
29030 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29031 if (save_reg_p (info->first_fp_reg_save + i))
29032 emit_frame_save (frame_reg_rtx,
29033 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29034 ? DFmode : SFmode),
29035 info->first_fp_reg_save + i,
29036 info->fp_save_offset + frame_off + 8 * i,
29037 sp_off - frame_off);
29039 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
29041 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29042 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29043 unsigned ptr_regno = ptr_regno_for_savres (sel);
29044 rtx ptr_reg = frame_reg_rtx;
29046 if (REGNO (frame_reg_rtx) == ptr_regno)
29047 gcc_checking_assert (frame_off == 0);
29048 else
29050 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29051 NOT_INUSE (ptr_regno);
29052 emit_insn (gen_add3_insn (ptr_reg,
29053 frame_reg_rtx, GEN_INT (frame_off)));
29055 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29056 info->fp_save_offset,
29057 info->lr_save_offset,
29058 DFmode, sel);
29059 rs6000_frame_related (insn, ptr_reg, sp_off,
29060 NULL_RTX, NULL_RTX);
29061 if (lr)
29062 END_USE (0);
29065 /* Save GPRs. This is done as a PARALLEL if we are using
29066 the store-multiple instructions. */
29067 if (!WORLD_SAVE_P (info)
29068 && TARGET_SPE_ABI
29069 && info->spe_64bit_regs_used != 0
29070 && info->first_gp_reg_save != 32)
29072 int i;
29073 rtx spe_save_area_ptr;
29074 HOST_WIDE_INT save_off;
29075 int ool_adjust = 0;
29077 /* Determine whether we can address all of the registers that need
29078 to be saved with an offset from frame_reg_rtx that fits in
29079 the small const field for SPE memory instructions. */
29080 int spe_regs_addressable
29081 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29082 + reg_size * (32 - info->first_gp_reg_save - 1))
29083 && (strategy & SAVE_INLINE_GPRS));
29085 if (spe_regs_addressable)
29087 spe_save_area_ptr = frame_reg_rtx;
29088 save_off = frame_off;
29090 else
29092 /* Make r11 point to the start of the SPE save area. We need
29093 to be careful here if r11 is holding the static chain. If
29094 it is, then temporarily save it in r0. */
29095 HOST_WIDE_INT offset;
29097 if (!(strategy & SAVE_INLINE_GPRS))
29098 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29099 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
29100 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
29101 save_off = frame_off - offset;
29103 if (using_static_chain_p)
29105 rtx r0 = gen_rtx_REG (Pmode, 0);
29107 START_USE (0);
29108 gcc_assert (info->first_gp_reg_save > 11);
29110 emit_move_insn (r0, spe_save_area_ptr);
29112 else if (REGNO (frame_reg_rtx) != 11)
29113 START_USE (11);
29115 emit_insn (gen_addsi3 (spe_save_area_ptr,
29116 frame_reg_rtx, GEN_INT (offset)));
29117 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
29118 frame_off = -info->spe_gp_save_offset + ool_adjust;
29121 if ((strategy & SAVE_INLINE_GPRS))
29123 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29124 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29125 emit_frame_save (spe_save_area_ptr, reg_mode,
29126 info->first_gp_reg_save + i,
29127 (info->spe_gp_save_offset + save_off
29128 + reg_size * i),
29129 sp_off - save_off);
29131 else
29133 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
29134 info->spe_gp_save_offset + save_off,
29135 0, reg_mode,
29136 SAVRES_SAVE | SAVRES_GPR);
29138 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
29139 NULL_RTX, NULL_RTX);
29142 /* Move the static chain pointer back. */
29143 if (!spe_regs_addressable)
29145 if (using_static_chain_p)
29147 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
29148 END_USE (0);
29150 else if (REGNO (frame_reg_rtx) != 11)
29151 END_USE (11);
29154 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
29156 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
29157 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
29158 unsigned ptr_regno = ptr_regno_for_savres (sel);
29159 rtx ptr_reg = frame_reg_rtx;
29160 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
29161 int end_save = info->gp_save_offset + info->gp_size;
29162 int ptr_off;
29164 if (ptr_regno == 12)
29165 sp_adjust = 0;
29166 if (!ptr_set_up)
29167 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29169 /* Need to adjust r11 (r12) if we saved any FPRs. */
29170 if (end_save + frame_off != 0)
29172 rtx offset = GEN_INT (end_save + frame_off);
29174 if (ptr_set_up)
29175 frame_off = -end_save;
29176 else
29177 NOT_INUSE (ptr_regno);
29178 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29180 else if (!ptr_set_up)
29182 NOT_INUSE (ptr_regno);
29183 emit_move_insn (ptr_reg, frame_reg_rtx);
29185 ptr_off = -end_save;
29186 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29187 info->gp_save_offset + ptr_off,
29188 info->lr_save_offset + ptr_off,
29189 reg_mode, sel);
29190 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
29191 NULL_RTX, NULL_RTX);
29192 if (lr)
29193 END_USE (0);
29195 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
29197 rtvec p;
29198 int i;
29199 p = rtvec_alloc (32 - info->first_gp_reg_save);
29200 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29201 RTVEC_ELT (p, i)
29202 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29203 frame_reg_rtx,
29204 info->gp_save_offset + frame_off + reg_size * i);
29205 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29206 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29207 NULL_RTX, NULL_RTX);
29209 else if (!WORLD_SAVE_P (info))
29211 int offset = info->gp_save_offset + frame_off;
29212 for (int i = info->first_gp_reg_save; i < 32; i++)
29214 if (rs6000_reg_live_or_pic_offset_p (i)
29215 && !cfun->machine->gpr_is_wrapped_separately[i])
29216 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
29217 sp_off - frame_off);
29219 offset += reg_size;
29223 if (crtl->calls_eh_return)
29225 unsigned int i;
29226 rtvec p;
29228 for (i = 0; ; ++i)
29230 unsigned int regno = EH_RETURN_DATA_REGNO (i);
29231 if (regno == INVALID_REGNUM)
29232 break;
29235 p = rtvec_alloc (i);
29237 for (i = 0; ; ++i)
29239 unsigned int regno = EH_RETURN_DATA_REGNO (i);
29240 if (regno == INVALID_REGNUM)
29241 break;
29243 rtx set
29244 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
29245 sp_reg_rtx,
29246 info->ehrd_offset + sp_off + reg_size * (int) i);
29247 RTVEC_ELT (p, i) = set;
29248 RTX_FRAME_RELATED_P (set) = 1;
29251 insn = emit_insn (gen_blockage ());
29252 RTX_FRAME_RELATED_P (insn) = 1;
29253 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
29256 /* In AIX ABI we need to make sure r2 is really saved. */
29257 if (TARGET_AIX && crtl->calls_eh_return)
29259 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
29260 rtx join_insn, note;
29261 rtx_insn *save_insn;
29262 long toc_restore_insn;
29264 tmp_reg = gen_rtx_REG (Pmode, 11);
29265 tmp_reg_si = gen_rtx_REG (SImode, 11);
29266 if (using_static_chain_p)
29268 START_USE (0);
29269 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
29271 else
29272 START_USE (11);
29273 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
29274 /* Peek at instruction to which this function returns. If it's
29275 restoring r2, then we know we've already saved r2. We can't
29276 unconditionally save r2 because the value we have will already
29277 be updated if we arrived at this function via a plt call or
29278 toc adjusting stub. */
29279 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
29280 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
29281 + RS6000_TOC_SAVE_SLOT);
29282 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
29283 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
29284 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
29285 validate_condition_mode (EQ, CCUNSmode);
29286 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
29287 emit_insn (gen_rtx_SET (compare_result,
29288 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
29289 toc_save_done = gen_label_rtx ();
29290 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29291 gen_rtx_EQ (VOIDmode, compare_result,
29292 const0_rtx),
29293 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
29294 pc_rtx);
29295 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29296 JUMP_LABEL (jump) = toc_save_done;
29297 LABEL_NUSES (toc_save_done) += 1;
29299 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
29300 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
29301 sp_off - frame_off);
29303 emit_label (toc_save_done);
29305 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
29306 have a CFG that has different saves along different paths.
29307 Move the note to a dummy blockage insn, which describes that
29308 R2 is unconditionally saved after the label. */
29309 /* ??? An alternate representation might be a special insn pattern
29310 containing both the branch and the store. That might let the
29311 code that minimizes the number of DW_CFA_advance opcodes better
29312 freedom in placing the annotations. */
29313 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
29314 if (note)
29315 remove_note (save_insn, note);
29316 else
29317 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
29318 copy_rtx (PATTERN (save_insn)), NULL_RTX);
29319 RTX_FRAME_RELATED_P (save_insn) = 0;
29321 join_insn = emit_insn (gen_blockage ());
29322 REG_NOTES (join_insn) = note;
29323 RTX_FRAME_RELATED_P (join_insn) = 1;
29325 if (using_static_chain_p)
29327 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
29328 END_USE (0);
29330 else
29331 END_USE (11);
29334 /* Save CR if we use any that must be preserved. */
29335 if (!WORLD_SAVE_P (info) && info->cr_save_p)
29337 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29338 GEN_INT (info->cr_save_offset + frame_off));
29339 rtx mem = gen_frame_mem (SImode, addr);
29341 /* If we didn't copy cr before, do so now using r0. */
29342 if (cr_save_rtx == NULL_RTX)
29344 START_USE (0);
29345 cr_save_rtx = gen_rtx_REG (SImode, 0);
29346 rs6000_emit_move_from_cr (cr_save_rtx);
29349 /* Saving CR requires a two-instruction sequence: one instruction
29350 to move the CR to a general-purpose register, and a second
29351 instruction that stores the GPR to memory.
29353 We do not emit any DWARF CFI records for the first of these,
29354 because we cannot properly represent the fact that CR is saved in
29355 a register. One reason is that we cannot express that multiple
29356 CR fields are saved; another reason is that on 64-bit, the size
29357 of the CR register in DWARF (4 bytes) differs from the size of
29358 a general-purpose register.
29360 This means if any intervening instruction were to clobber one of
29361 the call-saved CR fields, we'd have incorrect CFI. To prevent
29362 this from happening, we mark the store to memory as a use of
29363 those CR fields, which prevents any such instruction from being
29364 scheduled in between the two instructions. */
29365 rtx crsave_v[9];
29366 int n_crsave = 0;
29367 int i;
29369 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
29370 for (i = 0; i < 8; i++)
29371 if (save_reg_p (CR0_REGNO + i))
29372 crsave_v[n_crsave++]
29373 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
29375 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
29376 gen_rtvec_v (n_crsave, crsave_v)));
29377 END_USE (REGNO (cr_save_rtx));
29379 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
29380 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
29381 so we need to construct a frame expression manually. */
29382 RTX_FRAME_RELATED_P (insn) = 1;
29384 /* Update address to be stack-pointer relative, like
29385 rs6000_frame_related would do. */
29386 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
29387 GEN_INT (info->cr_save_offset + sp_off));
29388 mem = gen_frame_mem (SImode, addr);
29390 if (DEFAULT_ABI == ABI_ELFv2)
29392 /* In the ELFv2 ABI we generate separate CFI records for each
29393 CR field that was actually saved. They all point to the
29394 same 32-bit stack slot. */
29395 rtx crframe[8];
29396 int n_crframe = 0;
29398 for (i = 0; i < 8; i++)
29399 if (save_reg_p (CR0_REGNO + i))
29401 crframe[n_crframe]
29402 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
29404 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
29405 n_crframe++;
29408 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
29409 gen_rtx_PARALLEL (VOIDmode,
29410 gen_rtvec_v (n_crframe, crframe)));
29412 else
29414 /* In other ABIs, by convention, we use a single CR regnum to
29415 represent the fact that all call-saved CR fields are saved.
29416 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
29417 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
29418 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
29422 /* In the ELFv2 ABI we need to save all call-saved CR fields into
29423 *separate* slots if the routine calls __builtin_eh_return, so
29424 that they can be independently restored by the unwinder. */
29425 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
29427 int i, cr_off = info->ehcr_offset;
29428 rtx crsave;
29430 /* ??? We might get better performance by using multiple mfocrf
29431 instructions. */
29432 crsave = gen_rtx_REG (SImode, 0);
29433 emit_insn (gen_movesi_from_cr (crsave));
29435 for (i = 0; i < 8; i++)
29436 if (!call_used_regs[CR0_REGNO + i])
29438 rtvec p = rtvec_alloc (2);
29439 RTVEC_ELT (p, 0)
29440 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
29441 RTVEC_ELT (p, 1)
29442 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
29444 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29446 RTX_FRAME_RELATED_P (insn) = 1;
29447 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
29448 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
29449 sp_reg_rtx, cr_off + sp_off));
29451 cr_off += reg_size;
29455 /* Update stack and set back pointer unless this is V.4,
29456 for which it was done previously. */
29457 if (!WORLD_SAVE_P (info) && info->push_p
29458 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
29460 rtx ptr_reg = NULL;
29461 int ptr_off = 0;
29463 /* If saving altivec regs we need to be able to address all save
29464 locations using a 16-bit offset. */
29465 if ((strategy & SAVE_INLINE_VRS) == 0
29466 || (info->altivec_size != 0
29467 && (info->altivec_save_offset + info->altivec_size - 16
29468 + info->total_size - frame_off) > 32767)
29469 || (info->vrsave_size != 0
29470 && (info->vrsave_save_offset
29471 + info->total_size - frame_off) > 32767))
29473 int sel = SAVRES_SAVE | SAVRES_VR;
29474 unsigned ptr_regno = ptr_regno_for_savres (sel);
29476 if (using_static_chain_p
29477 && ptr_regno == STATIC_CHAIN_REGNUM)
29478 ptr_regno = 12;
29479 if (REGNO (frame_reg_rtx) != ptr_regno)
29480 START_USE (ptr_regno);
29481 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29482 frame_reg_rtx = ptr_reg;
29483 ptr_off = info->altivec_save_offset + info->altivec_size;
29484 frame_off = -ptr_off;
29486 else if (REGNO (frame_reg_rtx) == 1)
29487 frame_off = info->total_size;
29488 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29489 ptr_reg, ptr_off);
29490 if (REGNO (frame_reg_rtx) == 12)
29491 sp_adjust = 0;
29492 sp_off = info->total_size;
29493 if (frame_reg_rtx != sp_reg_rtx)
29494 rs6000_emit_stack_tie (frame_reg_rtx, false);
29497 /* Set frame pointer, if needed. */
29498 if (frame_pointer_needed)
29500 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
29501 sp_reg_rtx);
29502 RTX_FRAME_RELATED_P (insn) = 1;
29505 /* Save AltiVec registers if needed. Save here because the red zone does
29506 not always include AltiVec registers. */
29507 if (!WORLD_SAVE_P (info)
29508 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
29510 int end_save = info->altivec_save_offset + info->altivec_size;
29511 int ptr_off;
29512 /* Oddly, the vector save/restore functions point r0 at the end
29513 of the save area, then use r11 or r12 to load offsets for
29514 [reg+reg] addressing. */
29515 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29516 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
29517 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29519 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
29520 NOT_INUSE (0);
29521 if (scratch_regno == 12)
29522 sp_adjust = 0;
29523 if (end_save + frame_off != 0)
29525 rtx offset = GEN_INT (end_save + frame_off);
29527 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29529 else
29530 emit_move_insn (ptr_reg, frame_reg_rtx);
29532 ptr_off = -end_save;
29533 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29534 info->altivec_save_offset + ptr_off,
29535 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
29536 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
29537 NULL_RTX, NULL_RTX);
29538 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
29540 /* The oddity mentioned above clobbered our frame reg. */
29541 emit_move_insn (frame_reg_rtx, ptr_reg);
29542 frame_off = ptr_off;
29545 else if (!WORLD_SAVE_P (info)
29546 && info->altivec_size != 0)
29548 int i;
29550 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29551 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29553 rtx areg, savereg, mem;
29554 HOST_WIDE_INT offset;
29556 offset = (info->altivec_save_offset + frame_off
29557 + 16 * (i - info->first_altivec_reg_save));
29559 savereg = gen_rtx_REG (V4SImode, i);
29561 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29563 mem = gen_frame_mem (V4SImode,
29564 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29565 GEN_INT (offset)));
29566 insn = emit_insn (gen_rtx_SET (mem, savereg));
29567 areg = NULL_RTX;
29569 else
29571 NOT_INUSE (0);
29572 areg = gen_rtx_REG (Pmode, 0);
29573 emit_move_insn (areg, GEN_INT (offset));
29575 /* AltiVec addressing mode is [reg+reg]. */
29576 mem = gen_frame_mem (V4SImode,
29577 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
29579 /* Rather than emitting a generic move, force use of the stvx
29580 instruction, which we always want on ISA 2.07 (power8) systems.
29581 In particular we don't want xxpermdi/stxvd2x for little
29582 endian. */
29583 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
29586 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29587 areg, GEN_INT (offset));
29591 /* VRSAVE is a bit vector representing which AltiVec registers
29592 are used. The OS uses this to determine which vector
29593 registers to save on a context switch. We need to save
29594 VRSAVE on the stack frame, add whatever AltiVec registers we
29595 used in this function, and do the corresponding magic in the
29596 epilogue. */
29598 if (!WORLD_SAVE_P (info)
29599 && info->vrsave_size != 0)
29601 rtx reg, vrsave;
29602 int offset;
29603 int save_regno;
29605 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
29606 be using r12 as frame_reg_rtx and r11 as the static chain
29607 pointer for nested functions. */
29608 save_regno = 12;
29609 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29610 && !using_static_chain_p)
29611 save_regno = 11;
29612 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
29614 save_regno = 11;
29615 if (using_static_chain_p)
29616 save_regno = 0;
29619 NOT_INUSE (save_regno);
29620 reg = gen_rtx_REG (SImode, save_regno);
29621 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
29622 if (TARGET_MACHO)
29623 emit_insn (gen_get_vrsave_internal (reg));
29624 else
29625 emit_insn (gen_rtx_SET (reg, vrsave));
29627 /* Save VRSAVE. */
29628 offset = info->vrsave_save_offset + frame_off;
29629 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
29631 /* Include the registers in the mask. */
29632 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
29634 insn = emit_insn (generate_set_vrsave (reg, info, 0));
29637 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
29638 if (!TARGET_SINGLE_PIC_BASE
29639 && ((TARGET_TOC && TARGET_MINIMAL_TOC
29640 && !constant_pool_empty_p ())
29641 || (DEFAULT_ABI == ABI_V4
29642 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
29643 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
29645 /* If emit_load_toc_table will use the link register, we need to save
29646 it. We use R12 for this purpose because emit_load_toc_table
29647 can use register 0. This allows us to use a plain 'blr' to return
29648 from the procedure more often. */
29649 int save_LR_around_toc_setup = (TARGET_ELF
29650 && DEFAULT_ABI == ABI_V4
29651 && flag_pic
29652 && ! info->lr_save_p
29653 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
29654 if (save_LR_around_toc_setup)
29656 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29657 rtx tmp = gen_rtx_REG (Pmode, 12);
29659 sp_adjust = 0;
29660 insn = emit_move_insn (tmp, lr);
29661 RTX_FRAME_RELATED_P (insn) = 1;
29663 rs6000_emit_load_toc_table (TRUE);
29665 insn = emit_move_insn (lr, tmp);
29666 add_reg_note (insn, REG_CFA_RESTORE, lr);
29667 RTX_FRAME_RELATED_P (insn) = 1;
29669 else
29670 rs6000_emit_load_toc_table (TRUE);
29673 #if TARGET_MACHO
29674 if (!TARGET_SINGLE_PIC_BASE
29675 && DEFAULT_ABI == ABI_DARWIN
29676 && flag_pic && crtl->uses_pic_offset_table)
29678 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29679 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
29681 /* Save and restore LR locally around this call (in R0). */
29682 if (!info->lr_save_p)
29683 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
29685 emit_insn (gen_load_macho_picbase (src));
29687 emit_move_insn (gen_rtx_REG (Pmode,
29688 RS6000_PIC_OFFSET_TABLE_REGNUM),
29689 lr);
29691 if (!info->lr_save_p)
29692 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
29694 #endif
29696 /* If we need to, save the TOC register after doing the stack setup.
29697 Do not emit eh frame info for this save. The unwinder wants info,
29698 conceptually attached to instructions in this function, about
29699 register values in the caller of this function. This R2 may have
29700 already been changed from the value in the caller.
29701 We don't attempt to write accurate DWARF EH frame info for R2
29702 because code emitted by gcc for a (non-pointer) function call
29703 doesn't save and restore R2. Instead, R2 is managed out-of-line
29704 by a linker generated plt call stub when the function resides in
29705 a shared library. This behavior is costly to describe in DWARF,
29706 both in terms of the size of DWARF info and the time taken in the
29707 unwinder to interpret it. R2 changes, apart from the
29708 calls_eh_return case earlier in this function, are handled by
29709 linux-unwind.h frob_update_context. */
29710 if (rs6000_save_toc_in_prologue_p ())
29712 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
29713 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
29716 if (using_split_stack && split_stack_arg_pointer_used_p ())
29718 /* Set up the arg pointer (r12) for -fsplit-stack code. If
29719 __morestack was called, it left the arg pointer to the old
29720 stack in r29. Otherwise, the arg pointer is the top of the
29721 current frame. */
29722 cfun->machine->split_stack_argp_used = true;
29723 if (sp_adjust)
29725 rtx r12 = gen_rtx_REG (Pmode, 12);
29726 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
29727 emit_insn_before (set_r12, sp_adjust);
29729 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
29731 rtx r12 = gen_rtx_REG (Pmode, 12);
29732 if (frame_off == 0)
29733 emit_move_insn (r12, frame_reg_rtx);
29734 else
29735 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
29737 if (info->push_p)
29739 rtx r12 = gen_rtx_REG (Pmode, 12);
29740 rtx r29 = gen_rtx_REG (Pmode, 29);
29741 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29742 rtx not_more = gen_label_rtx ();
29743 rtx jump;
29745 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29746 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
29747 gen_rtx_LABEL_REF (VOIDmode, not_more),
29748 pc_rtx);
29749 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29750 JUMP_LABEL (jump) = not_more;
29751 LABEL_NUSES (not_more) += 1;
29752 emit_move_insn (r12, r29);
29753 emit_label (not_more);
29758 /* Output .extern statements for the save/restore routines we use. */
29760 static void
29761 rs6000_output_savres_externs (FILE *file)
29763 rs6000_stack_t *info = rs6000_stack_info ();
29765 if (TARGET_DEBUG_STACK)
29766 debug_stack_info (info);
29768 /* Write .extern for any function we will call to save and restore
29769 fp values. */
29770 if (info->first_fp_reg_save < 64
29771 && !TARGET_MACHO
29772 && !TARGET_ELF)
29774 char *name;
29775 int regno = info->first_fp_reg_save - 32;
29777 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
29779 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29780 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29781 name = rs6000_savres_routine_name (info, regno, sel);
29782 fprintf (file, "\t.extern %s\n", name);
29784 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
29786 bool lr = (info->savres_strategy
29787 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29788 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29789 name = rs6000_savres_routine_name (info, regno, sel);
29790 fprintf (file, "\t.extern %s\n", name);
29795 /* Write function prologue. */
29797 static void
29798 rs6000_output_function_prologue (FILE *file,
29799 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
29801 if (!cfun->is_thunk)
29802 rs6000_output_savres_externs (file);
29804 /* ELFv2 ABI r2 setup code and local entry point. This must follow
29805 immediately after the global entry point label. */
29806 if (rs6000_global_entry_point_needed_p ())
29808 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
29810 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
29812 if (TARGET_CMODEL != CMODEL_LARGE)
29814 /* In the small and medium code models, we assume the TOC is less
29815 2 GB away from the text section, so it can be computed via the
29816 following two-instruction sequence. */
29817 char buf[256];
29819 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29820 fprintf (file, "0:\taddis 2,12,.TOC.-");
29821 assemble_name (file, buf);
29822 fprintf (file, "@ha\n");
29823 fprintf (file, "\taddi 2,2,.TOC.-");
29824 assemble_name (file, buf);
29825 fprintf (file, "@l\n");
29827 else
29829 /* In the large code model, we allow arbitrary offsets between the
29830 TOC and the text section, so we have to load the offset from
29831 memory. The data field is emitted directly before the global
29832 entry point in rs6000_elf_declare_function_name. */
29833 char buf[256];
29835 #ifdef HAVE_AS_ENTRY_MARKERS
29836 /* If supported by the linker, emit a marker relocation. If the
29837 total code size of the final executable or shared library
29838 happens to fit into 2 GB after all, the linker will replace
29839 this code sequence with the sequence for the small or medium
29840 code model. */
29841 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
29842 #endif
29843 fprintf (file, "\tld 2,");
29844 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
29845 assemble_name (file, buf);
29846 fprintf (file, "-");
29847 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29848 assemble_name (file, buf);
29849 fprintf (file, "(12)\n");
29850 fprintf (file, "\tadd 2,2,12\n");
29853 fputs ("\t.localentry\t", file);
29854 assemble_name (file, name);
29855 fputs (",.-", file);
29856 assemble_name (file, name);
29857 fputs ("\n", file);
29860 /* Output -mprofile-kernel code. This needs to be done here instead of
29861 in output_function_profile since it must go after the ELFv2 ABI
29862 local entry point. */
29863 if (TARGET_PROFILE_KERNEL && crtl->profile)
29865 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
29866 gcc_assert (!TARGET_32BIT);
29868 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
29870 /* In the ELFv2 ABI we have no compiler stack word. It must be
29871 the resposibility of _mcount to preserve the static chain
29872 register if required. */
29873 if (DEFAULT_ABI != ABI_ELFv2
29874 && cfun->static_chain_decl != NULL)
29876 asm_fprintf (file, "\tstd %s,24(%s)\n",
29877 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
29878 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
29879 asm_fprintf (file, "\tld %s,24(%s)\n",
29880 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
29882 else
29883 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
29886 rs6000_pic_labelno++;
29889 /* -mprofile-kernel code calls mcount before the function prolog,
29890 so a profiled leaf function should stay a leaf function. */
29891 static bool
29892 rs6000_keep_leaf_when_profiled ()
29894 return TARGET_PROFILE_KERNEL;
29897 /* Non-zero if vmx regs are restored before the frame pop, zero if
29898 we restore after the pop when possible. */
29899 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
29901 /* Restoring cr is a two step process: loading a reg from the frame
29902 save, then moving the reg to cr. For ABI_V4 we must let the
29903 unwinder know that the stack location is no longer valid at or
29904 before the stack deallocation, but we can't emit a cfa_restore for
29905 cr at the stack deallocation like we do for other registers.
29906 The trouble is that it is possible for the move to cr to be
29907 scheduled after the stack deallocation. So say exactly where cr
29908 is located on each of the two insns. */
29910 static rtx
29911 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
29913 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
29914 rtx reg = gen_rtx_REG (SImode, regno);
29915 rtx_insn *insn = emit_move_insn (reg, mem);
29917 if (!exit_func && DEFAULT_ABI == ABI_V4)
29919 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
29920 rtx set = gen_rtx_SET (reg, cr);
29922 add_reg_note (insn, REG_CFA_REGISTER, set);
29923 RTX_FRAME_RELATED_P (insn) = 1;
29925 return reg;
29928 /* Reload CR from REG. */
29930 static void
29931 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
29933 int count = 0;
29934 int i;
29936 if (using_mfcr_multiple)
29938 for (i = 0; i < 8; i++)
29939 if (save_reg_p (CR0_REGNO + i))
29940 count++;
29941 gcc_assert (count);
29944 if (using_mfcr_multiple && count > 1)
29946 rtx_insn *insn;
29947 rtvec p;
29948 int ndx;
29950 p = rtvec_alloc (count);
29952 ndx = 0;
29953 for (i = 0; i < 8; i++)
29954 if (save_reg_p (CR0_REGNO + i))
29956 rtvec r = rtvec_alloc (2);
29957 RTVEC_ELT (r, 0) = reg;
29958 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
29959 RTVEC_ELT (p, ndx) =
29960 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
29961 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
29962 ndx++;
29964 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29965 gcc_assert (ndx == count);
29967 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29968 CR field separately. */
29969 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29971 for (i = 0; i < 8; i++)
29972 if (save_reg_p (CR0_REGNO + i))
29973 add_reg_note (insn, REG_CFA_RESTORE,
29974 gen_rtx_REG (SImode, CR0_REGNO + i));
29976 RTX_FRAME_RELATED_P (insn) = 1;
29979 else
29980 for (i = 0; i < 8; i++)
29981 if (save_reg_p (CR0_REGNO + i))
29983 rtx insn = emit_insn (gen_movsi_to_cr_one
29984 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29986 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29987 CR field separately, attached to the insn that in fact
29988 restores this particular CR field. */
29989 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29991 add_reg_note (insn, REG_CFA_RESTORE,
29992 gen_rtx_REG (SImode, CR0_REGNO + i));
29994 RTX_FRAME_RELATED_P (insn) = 1;
29998 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
29999 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
30000 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30002 rtx_insn *insn = get_last_insn ();
30003 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30005 add_reg_note (insn, REG_CFA_RESTORE, cr);
30006 RTX_FRAME_RELATED_P (insn) = 1;
30010 /* Like cr, the move to lr instruction can be scheduled after the
30011 stack deallocation, but unlike cr, its stack frame save is still
30012 valid. So we only need to emit the cfa_restore on the correct
30013 instruction. */
30015 static void
30016 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
30018 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
30019 rtx reg = gen_rtx_REG (Pmode, regno);
30021 emit_move_insn (reg, mem);
30024 static void
30025 restore_saved_lr (int regno, bool exit_func)
30027 rtx reg = gen_rtx_REG (Pmode, regno);
30028 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30029 rtx_insn *insn = emit_move_insn (lr, reg);
30031 if (!exit_func && flag_shrink_wrap)
30033 add_reg_note (insn, REG_CFA_RESTORE, lr);
30034 RTX_FRAME_RELATED_P (insn) = 1;
30038 static rtx
30039 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
30041 if (DEFAULT_ABI == ABI_ELFv2)
30043 int i;
30044 for (i = 0; i < 8; i++)
30045 if (save_reg_p (CR0_REGNO + i))
30047 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
30048 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
30049 cfa_restores);
30052 else if (info->cr_save_p)
30053 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30054 gen_rtx_REG (SImode, CR2_REGNO),
30055 cfa_restores);
30057 if (info->lr_save_p)
30058 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30059 gen_rtx_REG (Pmode, LR_REGNO),
30060 cfa_restores);
30061 return cfa_restores;
30064 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30065 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30066 below stack pointer not cloberred by signals. */
30068 static inline bool
30069 offset_below_red_zone_p (HOST_WIDE_INT offset)
30071 return offset < (DEFAULT_ABI == ABI_V4
30073 : TARGET_32BIT ? -220 : -288);
30076 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30078 static void
30079 emit_cfa_restores (rtx cfa_restores)
30081 rtx_insn *insn = get_last_insn ();
30082 rtx *loc = &REG_NOTES (insn);
30084 while (*loc)
30085 loc = &XEXP (*loc, 1);
30086 *loc = cfa_restores;
30087 RTX_FRAME_RELATED_P (insn) = 1;
30090 /* Emit function epilogue as insns. */
30092 void
30093 rs6000_emit_epilogue (int sibcall)
30095 rs6000_stack_t *info;
30096 int restoring_GPRs_inline;
30097 int restoring_FPRs_inline;
30098 int using_load_multiple;
30099 int using_mtcr_multiple;
30100 int use_backchain_to_restore_sp;
30101 int restore_lr;
30102 int strategy;
30103 HOST_WIDE_INT frame_off = 0;
30104 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
30105 rtx frame_reg_rtx = sp_reg_rtx;
30106 rtx cfa_restores = NULL_RTX;
30107 rtx insn;
30108 rtx cr_save_reg = NULL_RTX;
30109 machine_mode reg_mode = Pmode;
30110 int reg_size = TARGET_32BIT ? 4 : 8;
30111 int i;
30112 bool exit_func;
30113 unsigned ptr_regno;
30115 info = rs6000_stack_info ();
30117 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
30119 reg_mode = V2SImode;
30120 reg_size = 8;
30123 strategy = info->savres_strategy;
30124 using_load_multiple = strategy & REST_MULTIPLE;
30125 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
30126 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
30127 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
30128 || rs6000_cpu == PROCESSOR_PPC603
30129 || rs6000_cpu == PROCESSOR_PPC750
30130 || optimize_size);
30131 /* Restore via the backchain when we have a large frame, since this
30132 is more efficient than an addis, addi pair. The second condition
30133 here will not trigger at the moment; We don't actually need a
30134 frame pointer for alloca, but the generic parts of the compiler
30135 give us one anyway. */
30136 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
30137 ? info->lr_save_offset
30138 : 0) > 32767
30139 || (cfun->calls_alloca
30140 && !frame_pointer_needed));
30141 restore_lr = (info->lr_save_p
30142 && (restoring_FPRs_inline
30143 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
30144 && (restoring_GPRs_inline
30145 || info->first_fp_reg_save < 64)
30146 && !cfun->machine->lr_is_wrapped_separately);
30149 if (WORLD_SAVE_P (info))
30151 int i, j;
30152 char rname[30];
30153 const char *alloc_rname;
30154 rtvec p;
30156 /* eh_rest_world_r10 will return to the location saved in the LR
30157 stack slot (which is not likely to be our caller.)
30158 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
30159 rest_world is similar, except any R10 parameter is ignored.
30160 The exception-handling stuff that was here in 2.95 is no
30161 longer necessary. */
30163 p = rtvec_alloc (9
30164 + 32 - info->first_gp_reg_save
30165 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
30166 + 63 + 1 - info->first_fp_reg_save);
30168 strcpy (rname, ((crtl->calls_eh_return) ?
30169 "*eh_rest_world_r10" : "*rest_world"));
30170 alloc_rname = ggc_strdup (rname);
30172 j = 0;
30173 RTVEC_ELT (p, j++) = ret_rtx;
30174 RTVEC_ELT (p, j++)
30175 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
30176 /* The instruction pattern requires a clobber here;
30177 it is shared with the restVEC helper. */
30178 RTVEC_ELT (p, j++)
30179 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
30182 /* CR register traditionally saved as CR2. */
30183 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
30184 RTVEC_ELT (p, j++)
30185 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
30186 if (flag_shrink_wrap)
30188 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30189 gen_rtx_REG (Pmode, LR_REGNO),
30190 cfa_restores);
30191 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30195 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30197 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
30198 RTVEC_ELT (p, j++)
30199 = gen_frame_load (reg,
30200 frame_reg_rtx, info->gp_save_offset + reg_size * i);
30201 if (flag_shrink_wrap)
30202 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30204 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
30206 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
30207 RTVEC_ELT (p, j++)
30208 = gen_frame_load (reg,
30209 frame_reg_rtx, info->altivec_save_offset + 16 * i);
30210 if (flag_shrink_wrap)
30211 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30213 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
30215 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
30216 ? DFmode : SFmode),
30217 info->first_fp_reg_save + i);
30218 RTVEC_ELT (p, j++)
30219 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
30220 if (flag_shrink_wrap)
30221 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30223 RTVEC_ELT (p, j++)
30224 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
30225 RTVEC_ELT (p, j++)
30226 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
30227 RTVEC_ELT (p, j++)
30228 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
30229 RTVEC_ELT (p, j++)
30230 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
30231 RTVEC_ELT (p, j++)
30232 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
30233 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
30235 if (flag_shrink_wrap)
30237 REG_NOTES (insn) = cfa_restores;
30238 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30239 RTX_FRAME_RELATED_P (insn) = 1;
30241 return;
30244 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
30245 if (info->push_p)
30246 frame_off = info->total_size;
30248 /* Restore AltiVec registers if we must do so before adjusting the
30249 stack. */
30250 if (info->altivec_size != 0
30251 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30252 || (DEFAULT_ABI != ABI_V4
30253 && offset_below_red_zone_p (info->altivec_save_offset))))
30255 int i;
30256 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
30258 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
30259 if (use_backchain_to_restore_sp)
30261 int frame_regno = 11;
30263 if ((strategy & REST_INLINE_VRS) == 0)
30265 /* Of r11 and r12, select the one not clobbered by an
30266 out-of-line restore function for the frame register. */
30267 frame_regno = 11 + 12 - scratch_regno;
30269 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
30270 emit_move_insn (frame_reg_rtx,
30271 gen_rtx_MEM (Pmode, sp_reg_rtx));
30272 frame_off = 0;
30274 else if (frame_pointer_needed)
30275 frame_reg_rtx = hard_frame_pointer_rtx;
30277 if ((strategy & REST_INLINE_VRS) == 0)
30279 int end_save = info->altivec_save_offset + info->altivec_size;
30280 int ptr_off;
30281 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30282 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30284 if (end_save + frame_off != 0)
30286 rtx offset = GEN_INT (end_save + frame_off);
30288 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30290 else
30291 emit_move_insn (ptr_reg, frame_reg_rtx);
30293 ptr_off = -end_save;
30294 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30295 info->altivec_save_offset + ptr_off,
30296 0, V4SImode, SAVRES_VR);
30298 else
30300 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30301 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30303 rtx addr, areg, mem, insn;
30304 rtx reg = gen_rtx_REG (V4SImode, i);
30305 HOST_WIDE_INT offset
30306 = (info->altivec_save_offset + frame_off
30307 + 16 * (i - info->first_altivec_reg_save));
30309 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30311 mem = gen_frame_mem (V4SImode,
30312 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30313 GEN_INT (offset)));
30314 insn = gen_rtx_SET (reg, mem);
30316 else
30318 areg = gen_rtx_REG (Pmode, 0);
30319 emit_move_insn (areg, GEN_INT (offset));
30321 /* AltiVec addressing mode is [reg+reg]. */
30322 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
30323 mem = gen_frame_mem (V4SImode, addr);
30325 /* Rather than emitting a generic move, force use of the
30326 lvx instruction, which we always want. In particular we
30327 don't want lxvd2x/xxpermdi for little endian. */
30328 insn = gen_altivec_lvx_v4si_internal (reg, mem);
30331 (void) emit_insn (insn);
30335 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30336 if (((strategy & REST_INLINE_VRS) == 0
30337 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
30338 && (flag_shrink_wrap
30339 || (offset_below_red_zone_p
30340 (info->altivec_save_offset
30341 + 16 * (i - info->first_altivec_reg_save)))))
30343 rtx reg = gen_rtx_REG (V4SImode, i);
30344 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30348 /* Restore VRSAVE if we must do so before adjusting the stack. */
30349 if (info->vrsave_size != 0
30350 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30351 || (DEFAULT_ABI != ABI_V4
30352 && offset_below_red_zone_p (info->vrsave_save_offset))))
30354 rtx reg;
30356 if (frame_reg_rtx == sp_reg_rtx)
30358 if (use_backchain_to_restore_sp)
30360 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
30361 emit_move_insn (frame_reg_rtx,
30362 gen_rtx_MEM (Pmode, sp_reg_rtx));
30363 frame_off = 0;
30365 else if (frame_pointer_needed)
30366 frame_reg_rtx = hard_frame_pointer_rtx;
30369 reg = gen_rtx_REG (SImode, 12);
30370 emit_insn (gen_frame_load (reg, frame_reg_rtx,
30371 info->vrsave_save_offset + frame_off));
30373 emit_insn (generate_set_vrsave (reg, info, 1));
30376 insn = NULL_RTX;
30377 /* If we have a large stack frame, restore the old stack pointer
30378 using the backchain. */
30379 if (use_backchain_to_restore_sp)
30381 if (frame_reg_rtx == sp_reg_rtx)
30383 /* Under V.4, don't reset the stack pointer until after we're done
30384 loading the saved registers. */
30385 if (DEFAULT_ABI == ABI_V4)
30386 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
30388 insn = emit_move_insn (frame_reg_rtx,
30389 gen_rtx_MEM (Pmode, sp_reg_rtx));
30390 frame_off = 0;
30392 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30393 && DEFAULT_ABI == ABI_V4)
30394 /* frame_reg_rtx has been set up by the altivec restore. */
30396 else
30398 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
30399 frame_reg_rtx = sp_reg_rtx;
30402 /* If we have a frame pointer, we can restore the old stack pointer
30403 from it. */
30404 else if (frame_pointer_needed)
30406 frame_reg_rtx = sp_reg_rtx;
30407 if (DEFAULT_ABI == ABI_V4)
30408 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
30409 /* Prevent reordering memory accesses against stack pointer restore. */
30410 else if (cfun->calls_alloca
30411 || offset_below_red_zone_p (-info->total_size))
30412 rs6000_emit_stack_tie (frame_reg_rtx, true);
30414 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
30415 GEN_INT (info->total_size)));
30416 frame_off = 0;
30418 else if (info->push_p
30419 && DEFAULT_ABI != ABI_V4
30420 && !crtl->calls_eh_return)
30422 /* Prevent reordering memory accesses against stack pointer restore. */
30423 if (cfun->calls_alloca
30424 || offset_below_red_zone_p (-info->total_size))
30425 rs6000_emit_stack_tie (frame_reg_rtx, false);
30426 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
30427 GEN_INT (info->total_size)));
30428 frame_off = 0;
30430 if (insn && frame_reg_rtx == sp_reg_rtx)
30432 if (cfa_restores)
30434 REG_NOTES (insn) = cfa_restores;
30435 cfa_restores = NULL_RTX;
30437 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30438 RTX_FRAME_RELATED_P (insn) = 1;
30441 /* Restore AltiVec registers if we have not done so already. */
30442 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30443 && info->altivec_size != 0
30444 && (DEFAULT_ABI == ABI_V4
30445 || !offset_below_red_zone_p (info->altivec_save_offset)))
30447 int i;
30449 if ((strategy & REST_INLINE_VRS) == 0)
30451 int end_save = info->altivec_save_offset + info->altivec_size;
30452 int ptr_off;
30453 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30454 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
30455 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30457 if (end_save + frame_off != 0)
30459 rtx offset = GEN_INT (end_save + frame_off);
30461 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30463 else
30464 emit_move_insn (ptr_reg, frame_reg_rtx);
30466 ptr_off = -end_save;
30467 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30468 info->altivec_save_offset + ptr_off,
30469 0, V4SImode, SAVRES_VR);
30470 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
30472 /* Frame reg was clobbered by out-of-line save. Restore it
30473 from ptr_reg, and if we are calling out-of-line gpr or
30474 fpr restore set up the correct pointer and offset. */
30475 unsigned newptr_regno = 1;
30476 if (!restoring_GPRs_inline)
30478 bool lr = info->gp_save_offset + info->gp_size == 0;
30479 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
30480 newptr_regno = ptr_regno_for_savres (sel);
30481 end_save = info->gp_save_offset + info->gp_size;
30483 else if (!restoring_FPRs_inline)
30485 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
30486 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30487 newptr_regno = ptr_regno_for_savres (sel);
30488 end_save = info->fp_save_offset + info->fp_size;
30491 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
30492 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
30494 if (end_save + ptr_off != 0)
30496 rtx offset = GEN_INT (end_save + ptr_off);
30498 frame_off = -end_save;
30499 if (TARGET_32BIT)
30500 emit_insn (gen_addsi3_carry (frame_reg_rtx,
30501 ptr_reg, offset));
30502 else
30503 emit_insn (gen_adddi3_carry (frame_reg_rtx,
30504 ptr_reg, offset));
30506 else
30508 frame_off = ptr_off;
30509 emit_move_insn (frame_reg_rtx, ptr_reg);
30513 else
30515 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30516 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30518 rtx addr, areg, mem, insn;
30519 rtx reg = gen_rtx_REG (V4SImode, i);
30520 HOST_WIDE_INT offset
30521 = (info->altivec_save_offset + frame_off
30522 + 16 * (i - info->first_altivec_reg_save));
30524 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30526 mem = gen_frame_mem (V4SImode,
30527 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30528 GEN_INT (offset)));
30529 insn = gen_rtx_SET (reg, mem);
30531 else
30533 areg = gen_rtx_REG (Pmode, 0);
30534 emit_move_insn (areg, GEN_INT (offset));
30536 /* AltiVec addressing mode is [reg+reg]. */
30537 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
30538 mem = gen_frame_mem (V4SImode, addr);
30540 /* Rather than emitting a generic move, force use of the
30541 lvx instruction, which we always want. In particular we
30542 don't want lxvd2x/xxpermdi for little endian. */
30543 insn = gen_altivec_lvx_v4si_internal (reg, mem);
30546 (void) emit_insn (insn);
30550 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30551 if (((strategy & REST_INLINE_VRS) == 0
30552 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
30553 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30555 rtx reg = gen_rtx_REG (V4SImode, i);
30556 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30560 /* Restore VRSAVE if we have not done so already. */
30561 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
30562 && info->vrsave_size != 0
30563 && (DEFAULT_ABI == ABI_V4
30564 || !offset_below_red_zone_p (info->vrsave_save_offset)))
30566 rtx reg;
30568 reg = gen_rtx_REG (SImode, 12);
30569 emit_insn (gen_frame_load (reg, frame_reg_rtx,
30570 info->vrsave_save_offset + frame_off));
30572 emit_insn (generate_set_vrsave (reg, info, 1));
30575 /* If we exit by an out-of-line restore function on ABI_V4 then that
30576 function will deallocate the stack, so we don't need to worry
30577 about the unwinder restoring cr from an invalid stack frame
30578 location. */
30579 exit_func = (!restoring_FPRs_inline
30580 || (!restoring_GPRs_inline
30581 && info->first_fp_reg_save == 64));
30583 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
30584 *separate* slots if the routine calls __builtin_eh_return, so
30585 that they can be independently restored by the unwinder. */
30586 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
30588 int i, cr_off = info->ehcr_offset;
30590 for (i = 0; i < 8; i++)
30591 if (!call_used_regs[CR0_REGNO + i])
30593 rtx reg = gen_rtx_REG (SImode, 0);
30594 emit_insn (gen_frame_load (reg, frame_reg_rtx,
30595 cr_off + frame_off));
30597 insn = emit_insn (gen_movsi_to_cr_one
30598 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
30600 if (!exit_func && flag_shrink_wrap)
30602 add_reg_note (insn, REG_CFA_RESTORE,
30603 gen_rtx_REG (SImode, CR0_REGNO + i));
30605 RTX_FRAME_RELATED_P (insn) = 1;
30608 cr_off += reg_size;
30612 /* Get the old lr if we saved it. If we are restoring registers
30613 out-of-line, then the out-of-line routines can do this for us. */
30614 if (restore_lr && restoring_GPRs_inline)
30615 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
30617 /* Get the old cr if we saved it. */
30618 if (info->cr_save_p)
30620 unsigned cr_save_regno = 12;
30622 if (!restoring_GPRs_inline)
30624 /* Ensure we don't use the register used by the out-of-line
30625 gpr register restore below. */
30626 bool lr = info->gp_save_offset + info->gp_size == 0;
30627 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
30628 int gpr_ptr_regno = ptr_regno_for_savres (sel);
30630 if (gpr_ptr_regno == 12)
30631 cr_save_regno = 11;
30632 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
30634 else if (REGNO (frame_reg_rtx) == 12)
30635 cr_save_regno = 11;
30637 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
30638 info->cr_save_offset + frame_off,
30639 exit_func);
30642 /* Set LR here to try to overlap restores below. */
30643 if (restore_lr && restoring_GPRs_inline)
30644 restore_saved_lr (0, exit_func);
30646 /* Load exception handler data registers, if needed. */
30647 if (crtl->calls_eh_return)
30649 unsigned int i, regno;
30651 if (TARGET_AIX)
30653 rtx reg = gen_rtx_REG (reg_mode, 2);
30654 emit_insn (gen_frame_load (reg, frame_reg_rtx,
30655 frame_off + RS6000_TOC_SAVE_SLOT));
30658 for (i = 0; ; ++i)
30660 rtx mem;
30662 regno = EH_RETURN_DATA_REGNO (i);
30663 if (regno == INVALID_REGNUM)
30664 break;
30666 /* Note: possible use of r0 here to address SPE regs. */
30667 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
30668 info->ehrd_offset + frame_off
30669 + reg_size * (int) i);
30671 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
30675 /* Restore GPRs. This is done as a PARALLEL if we are using
30676 the load-multiple instructions. */
30677 if (TARGET_SPE_ABI
30678 && info->spe_64bit_regs_used
30679 && info->first_gp_reg_save != 32)
30681 /* Determine whether we can address all of the registers that need
30682 to be saved with an offset from frame_reg_rtx that fits in
30683 the small const field for SPE memory instructions. */
30684 int spe_regs_addressable
30685 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
30686 + reg_size * (32 - info->first_gp_reg_save - 1))
30687 && restoring_GPRs_inline);
30689 if (!spe_regs_addressable)
30691 int ool_adjust = 0;
30692 rtx old_frame_reg_rtx = frame_reg_rtx;
30693 /* Make r11 point to the start of the SPE save area. We worried about
30694 not clobbering it when we were saving registers in the prologue.
30695 There's no need to worry here because the static chain is passed
30696 anew to every function. */
30698 if (!restoring_GPRs_inline)
30699 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
30700 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
30701 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
30702 GEN_INT (info->spe_gp_save_offset
30703 + frame_off
30704 - ool_adjust)));
30705 /* Keep the invariant that frame_reg_rtx + frame_off points
30706 at the top of the stack frame. */
30707 frame_off = -info->spe_gp_save_offset + ool_adjust;
30710 if (restoring_GPRs_inline)
30712 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
30714 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30715 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
30717 rtx offset, addr, mem, reg;
30719 /* We're doing all this to ensure that the immediate offset
30720 fits into the immediate field of 'evldd'. */
30721 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
30723 offset = GEN_INT (spe_offset + reg_size * i);
30724 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
30725 mem = gen_rtx_MEM (V2SImode, addr);
30726 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
30728 emit_move_insn (reg, mem);
30731 else
30732 rs6000_emit_savres_rtx (info, frame_reg_rtx,
30733 info->spe_gp_save_offset + frame_off,
30734 info->lr_save_offset + frame_off,
30735 reg_mode,
30736 SAVRES_GPR | SAVRES_LR);
30738 else if (!restoring_GPRs_inline)
30740 /* We are jumping to an out-of-line function. */
30741 rtx ptr_reg;
30742 int end_save = info->gp_save_offset + info->gp_size;
30743 bool can_use_exit = end_save == 0;
30744 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
30745 int ptr_off;
30747 /* Emit stack reset code if we need it. */
30748 ptr_regno = ptr_regno_for_savres (sel);
30749 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30750 if (can_use_exit)
30751 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
30752 else if (end_save + frame_off != 0)
30753 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
30754 GEN_INT (end_save + frame_off)));
30755 else if (REGNO (frame_reg_rtx) != ptr_regno)
30756 emit_move_insn (ptr_reg, frame_reg_rtx);
30757 if (REGNO (frame_reg_rtx) == ptr_regno)
30758 frame_off = -end_save;
30760 if (can_use_exit && info->cr_save_p)
30761 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
30763 ptr_off = -end_save;
30764 rs6000_emit_savres_rtx (info, ptr_reg,
30765 info->gp_save_offset + ptr_off,
30766 info->lr_save_offset + ptr_off,
30767 reg_mode, sel);
30769 else if (using_load_multiple)
30771 rtvec p;
30772 p = rtvec_alloc (32 - info->first_gp_reg_save);
30773 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30774 RTVEC_ELT (p, i)
30775 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
30776 frame_reg_rtx,
30777 info->gp_save_offset + frame_off + reg_size * i);
30778 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30780 else
30782 int offset = info->gp_save_offset + frame_off;
30783 for (i = info->first_gp_reg_save; i < 32; i++)
30785 if (rs6000_reg_live_or_pic_offset_p (i)
30786 && !cfun->machine->gpr_is_wrapped_separately[i])
30788 rtx reg = gen_rtx_REG (reg_mode, i);
30789 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
30792 offset += reg_size;
30796 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
30798 /* If the frame pointer was used then we can't delay emitting
30799 a REG_CFA_DEF_CFA note. This must happen on the insn that
30800 restores the frame pointer, r31. We may have already emitted
30801 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
30802 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
30803 be harmless if emitted. */
30804 if (frame_pointer_needed)
30806 insn = get_last_insn ();
30807 add_reg_note (insn, REG_CFA_DEF_CFA,
30808 plus_constant (Pmode, frame_reg_rtx, frame_off));
30809 RTX_FRAME_RELATED_P (insn) = 1;
30812 /* Set up cfa_restores. We always need these when
30813 shrink-wrapping. If not shrink-wrapping then we only need
30814 the cfa_restore when the stack location is no longer valid.
30815 The cfa_restores must be emitted on or before the insn that
30816 invalidates the stack, and of course must not be emitted
30817 before the insn that actually does the restore. The latter
30818 is why it is a bad idea to emit the cfa_restores as a group
30819 on the last instruction here that actually does a restore:
30820 That insn may be reordered with respect to others doing
30821 restores. */
30822 if (flag_shrink_wrap
30823 && !restoring_GPRs_inline
30824 && info->first_fp_reg_save == 64)
30825 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
30827 for (i = info->first_gp_reg_save; i < 32; i++)
30828 if (!restoring_GPRs_inline
30829 || using_load_multiple
30830 || rs6000_reg_live_or_pic_offset_p (i))
30832 if (cfun->machine->gpr_is_wrapped_separately[i])
30833 continue;
30835 rtx reg = gen_rtx_REG (reg_mode, i);
30836 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30840 if (!restoring_GPRs_inline
30841 && info->first_fp_reg_save == 64)
30843 /* We are jumping to an out-of-line function. */
30844 if (cfa_restores)
30845 emit_cfa_restores (cfa_restores);
30846 return;
30849 if (restore_lr && !restoring_GPRs_inline)
30851 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
30852 restore_saved_lr (0, exit_func);
30855 /* Restore fpr's if we need to do it without calling a function. */
30856 if (restoring_FPRs_inline)
30857 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
30858 if (save_reg_p (info->first_fp_reg_save + i))
30860 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
30861 ? DFmode : SFmode),
30862 info->first_fp_reg_save + i);
30863 emit_insn (gen_frame_load (reg, frame_reg_rtx,
30864 info->fp_save_offset + frame_off + 8 * i));
30865 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
30866 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30869 /* If we saved cr, restore it here. Just those that were used. */
30870 if (info->cr_save_p)
30871 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
30873 /* If this is V.4, unwind the stack pointer after all of the loads
30874 have been done, or set up r11 if we are restoring fp out of line. */
30875 ptr_regno = 1;
30876 if (!restoring_FPRs_inline)
30878 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30879 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30880 ptr_regno = ptr_regno_for_savres (sel);
30883 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
30884 if (REGNO (frame_reg_rtx) == ptr_regno)
30885 frame_off = 0;
30887 if (insn && restoring_FPRs_inline)
30889 if (cfa_restores)
30891 REG_NOTES (insn) = cfa_restores;
30892 cfa_restores = NULL_RTX;
30894 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30895 RTX_FRAME_RELATED_P (insn) = 1;
30898 if (crtl->calls_eh_return)
30900 rtx sa = EH_RETURN_STACKADJ_RTX;
30901 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
30904 if (!sibcall && restoring_FPRs_inline)
30906 if (cfa_restores)
30908 /* We can't hang the cfa_restores off a simple return,
30909 since the shrink-wrap code sometimes uses an existing
30910 return. This means there might be a path from
30911 pre-prologue code to this return, and dwarf2cfi code
30912 wants the eh_frame unwinder state to be the same on
30913 all paths to any point. So we need to emit the
30914 cfa_restores before the return. For -m64 we really
30915 don't need epilogue cfa_restores at all, except for
30916 this irritating dwarf2cfi with shrink-wrap
30917 requirement; The stack red-zone means eh_frame info
30918 from the prologue telling the unwinder to restore
30919 from the stack is perfectly good right to the end of
30920 the function. */
30921 emit_insn (gen_blockage ());
30922 emit_cfa_restores (cfa_restores);
30923 cfa_restores = NULL_RTX;
30926 emit_jump_insn (targetm.gen_simple_return ());
30929 if (!sibcall && !restoring_FPRs_inline)
30931 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30932 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
30933 int elt = 0;
30934 RTVEC_ELT (p, elt++) = ret_rtx;
30935 if (lr)
30936 RTVEC_ELT (p, elt++)
30937 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
30939 /* We have to restore more than two FP registers, so branch to the
30940 restore function. It will return to our caller. */
30941 int i;
30942 int reg;
30943 rtx sym;
30945 if (flag_shrink_wrap)
30946 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
30948 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
30949 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
30950 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
30951 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
30953 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
30955 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
30957 RTVEC_ELT (p, elt++)
30958 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
30959 if (flag_shrink_wrap)
30960 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30963 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
30966 if (cfa_restores)
30968 if (sibcall)
30969 /* Ensure the cfa_restores are hung off an insn that won't
30970 be reordered above other restores. */
30971 emit_insn (gen_blockage ());
30973 emit_cfa_restores (cfa_restores);
30977 /* Write function epilogue. */
30979 static void
30980 rs6000_output_function_epilogue (FILE *file,
30981 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
30983 #if TARGET_MACHO
30984 macho_branch_islands ();
30987 rtx_insn *insn = get_last_insn ();
30988 rtx_insn *deleted_debug_label = NULL;
30990 /* Mach-O doesn't support labels at the end of objects, so if
30991 it looks like we might want one, take special action.
30993 First, collect any sequence of deleted debug labels. */
30994 while (insn
30995 && NOTE_P (insn)
30996 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
30998 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
30999 notes only, instead set their CODE_LABEL_NUMBER to -1,
31000 otherwise there would be code generation differences
31001 in between -g and -g0. */
31002 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31003 deleted_debug_label = insn;
31004 insn = PREV_INSN (insn);
31007 /* Second, if we have:
31008 label:
31009 barrier
31010 then this needs to be detected, so skip past the barrier. */
31012 if (insn && BARRIER_P (insn))
31013 insn = PREV_INSN (insn);
31015 /* Up to now we've only seen notes or barriers. */
31016 if (insn)
31018 if (LABEL_P (insn)
31019 || (NOTE_P (insn)
31020 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
31021 /* Trailing label: <barrier>. */
31022 fputs ("\tnop\n", file);
31023 else
31025 /* Lastly, see if we have a completely empty function body. */
31026 while (insn && ! INSN_P (insn))
31027 insn = PREV_INSN (insn);
31028 /* If we don't find any insns, we've got an empty function body;
31029 I.e. completely empty - without a return or branch. This is
31030 taken as the case where a function body has been removed
31031 because it contains an inline __builtin_unreachable(). GCC
31032 states that reaching __builtin_unreachable() means UB so we're
31033 not obliged to do anything special; however, we want
31034 non-zero-sized function bodies. To meet this, and help the
31035 user out, let's trap the case. */
31036 if (insn == NULL)
31037 fputs ("\ttrap\n", file);
31040 else if (deleted_debug_label)
31041 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
31042 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31043 CODE_LABEL_NUMBER (insn) = -1;
31045 #endif
31047 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31048 on its format.
31050 We don't output a traceback table if -finhibit-size-directive was
31051 used. The documentation for -finhibit-size-directive reads
31052 ``don't output a @code{.size} assembler directive, or anything
31053 else that would cause trouble if the function is split in the
31054 middle, and the two halves are placed at locations far apart in
31055 memory.'' The traceback table has this property, since it
31056 includes the offset from the start of the function to the
31057 traceback table itself.
31059 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31060 different traceback table. */
31061 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31062 && ! flag_inhibit_size_directive
31063 && rs6000_traceback != traceback_none && !cfun->is_thunk)
31065 const char *fname = NULL;
31066 const char *language_string = lang_hooks.name;
31067 int fixed_parms = 0, float_parms = 0, parm_info = 0;
31068 int i;
31069 int optional_tbtab;
31070 rs6000_stack_t *info = rs6000_stack_info ();
31072 if (rs6000_traceback == traceback_full)
31073 optional_tbtab = 1;
31074 else if (rs6000_traceback == traceback_part)
31075 optional_tbtab = 0;
31076 else
31077 optional_tbtab = !optimize_size && !TARGET_ELF;
31079 if (optional_tbtab)
31081 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
31082 while (*fname == '.') /* V.4 encodes . in the name */
31083 fname++;
31085 /* Need label immediately before tbtab, so we can compute
31086 its offset from the function start. */
31087 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
31088 ASM_OUTPUT_LABEL (file, fname);
31091 /* The .tbtab pseudo-op can only be used for the first eight
31092 expressions, since it can't handle the possibly variable
31093 length fields that follow. However, if you omit the optional
31094 fields, the assembler outputs zeros for all optional fields
31095 anyways, giving each variable length field is minimum length
31096 (as defined in sys/debug.h). Thus we can not use the .tbtab
31097 pseudo-op at all. */
31099 /* An all-zero word flags the start of the tbtab, for debuggers
31100 that have to find it by searching forward from the entry
31101 point or from the current pc. */
31102 fputs ("\t.long 0\n", file);
31104 /* Tbtab format type. Use format type 0. */
31105 fputs ("\t.byte 0,", file);
31107 /* Language type. Unfortunately, there does not seem to be any
31108 official way to discover the language being compiled, so we
31109 use language_string.
31110 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
31111 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
31112 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
31113 either, so for now use 0. */
31114 if (lang_GNU_C ()
31115 || ! strcmp (language_string, "GNU GIMPLE")
31116 || ! strcmp (language_string, "GNU Go")
31117 || ! strcmp (language_string, "libgccjit"))
31118 i = 0;
31119 else if (! strcmp (language_string, "GNU F77")
31120 || lang_GNU_Fortran ())
31121 i = 1;
31122 else if (! strcmp (language_string, "GNU Pascal"))
31123 i = 2;
31124 else if (! strcmp (language_string, "GNU Ada"))
31125 i = 3;
31126 else if (lang_GNU_CXX ()
31127 || ! strcmp (language_string, "GNU Objective-C++"))
31128 i = 9;
31129 else if (! strcmp (language_string, "GNU Java"))
31130 i = 13;
31131 else if (! strcmp (language_string, "GNU Objective-C"))
31132 i = 14;
31133 else
31134 gcc_unreachable ();
31135 fprintf (file, "%d,", i);
31137 /* 8 single bit fields: global linkage (not set for C extern linkage,
31138 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
31139 from start of procedure stored in tbtab, internal function, function
31140 has controlled storage, function has no toc, function uses fp,
31141 function logs/aborts fp operations. */
31142 /* Assume that fp operations are used if any fp reg must be saved. */
31143 fprintf (file, "%d,",
31144 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
31146 /* 6 bitfields: function is interrupt handler, name present in
31147 proc table, function calls alloca, on condition directives
31148 (controls stack walks, 3 bits), saves condition reg, saves
31149 link reg. */
31150 /* The `function calls alloca' bit seems to be set whenever reg 31 is
31151 set up as a frame pointer, even when there is no alloca call. */
31152 fprintf (file, "%d,",
31153 ((optional_tbtab << 6)
31154 | ((optional_tbtab & frame_pointer_needed) << 5)
31155 | (info->cr_save_p << 1)
31156 | (info->lr_save_p)));
31158 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
31159 (6 bits). */
31160 fprintf (file, "%d,",
31161 (info->push_p << 7) | (64 - info->first_fp_reg_save));
31163 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
31164 fprintf (file, "%d,", (32 - first_reg_to_save ()));
31166 if (optional_tbtab)
31168 /* Compute the parameter info from the function decl argument
31169 list. */
31170 tree decl;
31171 int next_parm_info_bit = 31;
31173 for (decl = DECL_ARGUMENTS (current_function_decl);
31174 decl; decl = DECL_CHAIN (decl))
31176 rtx parameter = DECL_INCOMING_RTL (decl);
31177 machine_mode mode = GET_MODE (parameter);
31179 if (GET_CODE (parameter) == REG)
31181 if (SCALAR_FLOAT_MODE_P (mode))
31183 int bits;
31185 float_parms++;
31187 switch (mode)
31189 case SFmode:
31190 case SDmode:
31191 bits = 0x2;
31192 break;
31194 case DFmode:
31195 case DDmode:
31196 case TFmode:
31197 case TDmode:
31198 case IFmode:
31199 case KFmode:
31200 bits = 0x3;
31201 break;
31203 default:
31204 gcc_unreachable ();
31207 /* If only one bit will fit, don't or in this entry. */
31208 if (next_parm_info_bit > 0)
31209 parm_info |= (bits << (next_parm_info_bit - 1));
31210 next_parm_info_bit -= 2;
31212 else
31214 fixed_parms += ((GET_MODE_SIZE (mode)
31215 + (UNITS_PER_WORD - 1))
31216 / UNITS_PER_WORD);
31217 next_parm_info_bit -= 1;
31223 /* Number of fixed point parameters. */
31224 /* This is actually the number of words of fixed point parameters; thus
31225 an 8 byte struct counts as 2; and thus the maximum value is 8. */
31226 fprintf (file, "%d,", fixed_parms);
31228 /* 2 bitfields: number of floating point parameters (7 bits), parameters
31229 all on stack. */
31230 /* This is actually the number of fp registers that hold parameters;
31231 and thus the maximum value is 13. */
31232 /* Set parameters on stack bit if parameters are not in their original
31233 registers, regardless of whether they are on the stack? Xlc
31234 seems to set the bit when not optimizing. */
31235 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
31237 if (optional_tbtab)
31239 /* Optional fields follow. Some are variable length. */
31241 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
31242 float, 11 double float. */
31243 /* There is an entry for each parameter in a register, in the order
31244 that they occur in the parameter list. Any intervening arguments
31245 on the stack are ignored. If the list overflows a long (max
31246 possible length 34 bits) then completely leave off all elements
31247 that don't fit. */
31248 /* Only emit this long if there was at least one parameter. */
31249 if (fixed_parms || float_parms)
31250 fprintf (file, "\t.long %d\n", parm_info);
31252 /* Offset from start of code to tb table. */
31253 fputs ("\t.long ", file);
31254 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
31255 RS6000_OUTPUT_BASENAME (file, fname);
31256 putc ('-', file);
31257 rs6000_output_function_entry (file, fname);
31258 putc ('\n', file);
31260 /* Interrupt handler mask. */
31261 /* Omit this long, since we never set the interrupt handler bit
31262 above. */
31264 /* Number of CTL (controlled storage) anchors. */
31265 /* Omit this long, since the has_ctl bit is never set above. */
31267 /* Displacement into stack of each CTL anchor. */
31268 /* Omit this list of longs, because there are no CTL anchors. */
31270 /* Length of function name. */
31271 if (*fname == '*')
31272 ++fname;
31273 fprintf (file, "\t.short %d\n", (int) strlen (fname));
31275 /* Function name. */
31276 assemble_string (fname, strlen (fname));
31278 /* Register for alloca automatic storage; this is always reg 31.
31279 Only emit this if the alloca bit was set above. */
31280 if (frame_pointer_needed)
31281 fputs ("\t.byte 31\n", file);
31283 fputs ("\t.align 2\n", file);
31287 /* Arrange to define .LCTOC1 label, if not already done. */
31288 if (need_toc_init)
31290 need_toc_init = 0;
31291 if (!toc_initialized)
31293 switch_to_section (toc_section);
31294 switch_to_section (current_function_section ());
31299 /* -fsplit-stack support. */
31301 /* A SYMBOL_REF for __morestack. */
31302 static GTY(()) rtx morestack_ref;
31304 static rtx
31305 gen_add3_const (rtx rt, rtx ra, long c)
31307 if (TARGET_64BIT)
31308 return gen_adddi3 (rt, ra, GEN_INT (c));
31309 else
31310 return gen_addsi3 (rt, ra, GEN_INT (c));
31313 /* Emit -fsplit-stack prologue, which goes before the regular function
31314 prologue (at local entry point in the case of ELFv2). */
31316 void
31317 rs6000_expand_split_stack_prologue (void)
31319 rs6000_stack_t *info = rs6000_stack_info ();
31320 unsigned HOST_WIDE_INT allocate;
31321 long alloc_hi, alloc_lo;
31322 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
31323 rtx_insn *insn;
31325 gcc_assert (flag_split_stack && reload_completed);
31327 if (!info->push_p)
31328 return;
31330 if (global_regs[29])
31332 error ("-fsplit-stack uses register r29");
31333 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
31334 "conflicts with %qD", global_regs_decl[29]);
31337 allocate = info->total_size;
31338 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
31340 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
31341 return;
31343 if (morestack_ref == NULL_RTX)
31345 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
31346 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
31347 | SYMBOL_FLAG_FUNCTION);
31350 r0 = gen_rtx_REG (Pmode, 0);
31351 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
31352 r12 = gen_rtx_REG (Pmode, 12);
31353 emit_insn (gen_load_split_stack_limit (r0));
31354 /* Always emit two insns here to calculate the requested stack,
31355 so that the linker can edit them when adjusting size for calling
31356 non-split-stack code. */
31357 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
31358 alloc_lo = -allocate - alloc_hi;
31359 if (alloc_hi != 0)
31361 emit_insn (gen_add3_const (r12, r1, alloc_hi));
31362 if (alloc_lo != 0)
31363 emit_insn (gen_add3_const (r12, r12, alloc_lo));
31364 else
31365 emit_insn (gen_nop ());
31367 else
31369 emit_insn (gen_add3_const (r12, r1, alloc_lo));
31370 emit_insn (gen_nop ());
31373 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
31374 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
31375 ok_label = gen_label_rtx ();
31376 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
31377 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
31378 gen_rtx_LABEL_REF (VOIDmode, ok_label),
31379 pc_rtx);
31380 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
31381 JUMP_LABEL (insn) = ok_label;
31382 /* Mark the jump as very likely to be taken. */
31383 add_int_reg_note (insn, REG_BR_PROB,
31384 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
31386 lr = gen_rtx_REG (Pmode, LR_REGNO);
31387 insn = emit_move_insn (r0, lr);
31388 RTX_FRAME_RELATED_P (insn) = 1;
31389 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
31390 RTX_FRAME_RELATED_P (insn) = 1;
31392 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
31393 const0_rtx, const0_rtx));
31394 call_fusage = NULL_RTX;
31395 use_reg (&call_fusage, r12);
31396 /* Say the call uses r0, even though it doesn't, to stop regrename
31397 from twiddling with the insns saving lr, trashing args for cfun.
31398 The insns restoring lr are similarly protected by making
31399 split_stack_return use r0. */
31400 use_reg (&call_fusage, r0);
31401 add_function_usage_to (insn, call_fusage);
31402 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
31403 insn = emit_move_insn (lr, r0);
31404 add_reg_note (insn, REG_CFA_RESTORE, lr);
31405 RTX_FRAME_RELATED_P (insn) = 1;
31406 emit_insn (gen_split_stack_return ());
31408 emit_label (ok_label);
31409 LABEL_NUSES (ok_label) = 1;
31412 /* Return the internal arg pointer used for function incoming
31413 arguments. When -fsplit-stack, the arg pointer is r12 so we need
31414 to copy it to a pseudo in order for it to be preserved over calls
31415 and suchlike. We'd really like to use a pseudo here for the
31416 internal arg pointer but data-flow analysis is not prepared to
31417 accept pseudos as live at the beginning of a function. */
31419 static rtx
31420 rs6000_internal_arg_pointer (void)
31422 if (flag_split_stack
31423 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
31424 == NULL))
31427 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
31429 rtx pat;
31431 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
31432 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
31434 /* Put the pseudo initialization right after the note at the
31435 beginning of the function. */
31436 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
31437 gen_rtx_REG (Pmode, 12));
31438 push_topmost_sequence ();
31439 emit_insn_after (pat, get_insns ());
31440 pop_topmost_sequence ();
31442 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
31443 FIRST_PARM_OFFSET (current_function_decl));
31445 return virtual_incoming_args_rtx;
31448 /* We may have to tell the dataflow pass that the split stack prologue
31449 is initializing a register. */
31451 static void
31452 rs6000_live_on_entry (bitmap regs)
31454 if (flag_split_stack)
31455 bitmap_set_bit (regs, 12);
31458 /* Emit -fsplit-stack dynamic stack allocation space check. */
31460 void
31461 rs6000_split_stack_space_check (rtx size, rtx label)
31463 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
31464 rtx limit = gen_reg_rtx (Pmode);
31465 rtx requested = gen_reg_rtx (Pmode);
31466 rtx cmp = gen_reg_rtx (CCUNSmode);
31467 rtx jump;
31469 emit_insn (gen_load_split_stack_limit (limit));
31470 if (CONST_INT_P (size))
31471 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
31472 else
31474 size = force_reg (Pmode, size);
31475 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
31477 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
31478 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
31479 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
31480 gen_rtx_LABEL_REF (VOIDmode, label),
31481 pc_rtx);
31482 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
31483 JUMP_LABEL (jump) = label;
31486 /* A C compound statement that outputs the assembler code for a thunk
31487 function, used to implement C++ virtual function calls with
31488 multiple inheritance. The thunk acts as a wrapper around a virtual
31489 function, adjusting the implicit object parameter before handing
31490 control off to the real function.
31492 First, emit code to add the integer DELTA to the location that
31493 contains the incoming first argument. Assume that this argument
31494 contains a pointer, and is the one used to pass the `this' pointer
31495 in C++. This is the incoming argument *before* the function
31496 prologue, e.g. `%o0' on a sparc. The addition must preserve the
31497 values of all other incoming arguments.
31499 After the addition, emit code to jump to FUNCTION, which is a
31500 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
31501 not touch the return address. Hence returning from FUNCTION will
31502 return to whoever called the current `thunk'.
31504 The effect must be as if FUNCTION had been called directly with the
31505 adjusted first argument. This macro is responsible for emitting
31506 all of the code for a thunk function; output_function_prologue()
31507 and output_function_epilogue() are not invoked.
31509 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
31510 been extracted from it.) It might possibly be useful on some
31511 targets, but probably not.
31513 If you do not define this macro, the target-independent code in the
31514 C++ frontend will generate a less efficient heavyweight thunk that
31515 calls FUNCTION instead of jumping to it. The generic approach does
31516 not support varargs. */
31518 static void
31519 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
31520 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
31521 tree function)
31523 rtx this_rtx, funexp;
31524 rtx_insn *insn;
31526 reload_completed = 1;
31527 epilogue_completed = 1;
31529 /* Mark the end of the (empty) prologue. */
31530 emit_note (NOTE_INSN_PROLOGUE_END);
31532 /* Find the "this" pointer. If the function returns a structure,
31533 the structure return pointer is in r3. */
31534 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
31535 this_rtx = gen_rtx_REG (Pmode, 4);
31536 else
31537 this_rtx = gen_rtx_REG (Pmode, 3);
31539 /* Apply the constant offset, if required. */
31540 if (delta)
31541 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
31543 /* Apply the offset from the vtable, if required. */
31544 if (vcall_offset)
31546 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
31547 rtx tmp = gen_rtx_REG (Pmode, 12);
31549 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
31550 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
31552 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
31553 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
31555 else
31557 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
31559 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
31561 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
31564 /* Generate a tail call to the target function. */
31565 if (!TREE_USED (function))
31567 assemble_external (function);
31568 TREE_USED (function) = 1;
31570 funexp = XEXP (DECL_RTL (function), 0);
31571 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
31573 #if TARGET_MACHO
31574 if (MACHOPIC_INDIRECT)
31575 funexp = machopic_indirect_call_target (funexp);
31576 #endif
31578 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
31579 generate sibcall RTL explicitly. */
31580 insn = emit_call_insn (
31581 gen_rtx_PARALLEL (VOIDmode,
31582 gen_rtvec (3,
31583 gen_rtx_CALL (VOIDmode,
31584 funexp, const0_rtx),
31585 gen_rtx_USE (VOIDmode, const0_rtx),
31586 simple_return_rtx)));
31587 SIBLING_CALL_P (insn) = 1;
31588 emit_barrier ();
31590 /* Run just enough of rest_of_compilation to get the insns emitted.
31591 There's not really enough bulk here to make other passes such as
31592 instruction scheduling worth while. Note that use_thunk calls
31593 assemble_start_function and assemble_end_function. */
31594 insn = get_insns ();
31595 shorten_branches (insn);
31596 final_start_function (insn, file, 1);
31597 final (insn, file, 1);
31598 final_end_function ();
31600 reload_completed = 0;
31601 epilogue_completed = 0;
31604 /* A quick summary of the various types of 'constant-pool tables'
31605 under PowerPC:
31607 Target Flags Name One table per
31608 AIX (none) AIX TOC object file
31609 AIX -mfull-toc AIX TOC object file
31610 AIX -mminimal-toc AIX minimal TOC translation unit
31611 SVR4/EABI (none) SVR4 SDATA object file
31612 SVR4/EABI -fpic SVR4 pic object file
31613 SVR4/EABI -fPIC SVR4 PIC translation unit
31614 SVR4/EABI -mrelocatable EABI TOC function
31615 SVR4/EABI -maix AIX TOC object file
31616 SVR4/EABI -maix -mminimal-toc
31617 AIX minimal TOC translation unit
31619 Name Reg. Set by entries contains:
31620 made by addrs? fp? sum?
31622 AIX TOC 2 crt0 as Y option option
31623 AIX minimal TOC 30 prolog gcc Y Y option
31624 SVR4 SDATA 13 crt0 gcc N Y N
31625 SVR4 pic 30 prolog ld Y not yet N
31626 SVR4 PIC 30 prolog gcc Y option option
31627 EABI TOC 30 prolog gcc Y option option
31631 /* Hash functions for the hash table. */
31633 static unsigned
31634 rs6000_hash_constant (rtx k)
31636 enum rtx_code code = GET_CODE (k);
31637 machine_mode mode = GET_MODE (k);
31638 unsigned result = (code << 3) ^ mode;
31639 const char *format;
31640 int flen, fidx;
31642 format = GET_RTX_FORMAT (code);
31643 flen = strlen (format);
31644 fidx = 0;
31646 switch (code)
31648 case LABEL_REF:
31649 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
31651 case CONST_WIDE_INT:
31653 int i;
31654 flen = CONST_WIDE_INT_NUNITS (k);
31655 for (i = 0; i < flen; i++)
31656 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
31657 return result;
31660 case CONST_DOUBLE:
31661 if (mode != VOIDmode)
31662 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
31663 flen = 2;
31664 break;
31666 case CODE_LABEL:
31667 fidx = 3;
31668 break;
31670 default:
31671 break;
31674 for (; fidx < flen; fidx++)
31675 switch (format[fidx])
31677 case 's':
31679 unsigned i, len;
31680 const char *str = XSTR (k, fidx);
31681 len = strlen (str);
31682 result = result * 613 + len;
31683 for (i = 0; i < len; i++)
31684 result = result * 613 + (unsigned) str[i];
31685 break;
31687 case 'u':
31688 case 'e':
31689 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
31690 break;
31691 case 'i':
31692 case 'n':
31693 result = result * 613 + (unsigned) XINT (k, fidx);
31694 break;
31695 case 'w':
31696 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
31697 result = result * 613 + (unsigned) XWINT (k, fidx);
31698 else
31700 size_t i;
31701 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
31702 result = result * 613 + (unsigned) (XWINT (k, fidx)
31703 >> CHAR_BIT * i);
31705 break;
31706 case '0':
31707 break;
31708 default:
31709 gcc_unreachable ();
31712 return result;
31715 hashval_t
31716 toc_hasher::hash (toc_hash_struct *thc)
31718 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
31721 /* Compare H1 and H2 for equivalence. */
31723 bool
31724 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
31726 rtx r1 = h1->key;
31727 rtx r2 = h2->key;
31729 if (h1->key_mode != h2->key_mode)
31730 return 0;
31732 return rtx_equal_p (r1, r2);
31735 /* These are the names given by the C++ front-end to vtables, and
31736 vtable-like objects. Ideally, this logic should not be here;
31737 instead, there should be some programmatic way of inquiring as
31738 to whether or not an object is a vtable. */
31740 #define VTABLE_NAME_P(NAME) \
31741 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
31742 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
31743 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
31744 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
31745 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
31747 #ifdef NO_DOLLAR_IN_LABEL
31748 /* Return a GGC-allocated character string translating dollar signs in
31749 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
31751 const char *
31752 rs6000_xcoff_strip_dollar (const char *name)
31754 char *strip, *p;
31755 const char *q;
31756 size_t len;
31758 q = (const char *) strchr (name, '$');
31760 if (q == 0 || q == name)
31761 return name;
31763 len = strlen (name);
31764 strip = XALLOCAVEC (char, len + 1);
31765 strcpy (strip, name);
31766 p = strip + (q - name);
31767 while (p)
31769 *p = '_';
31770 p = strchr (p + 1, '$');
31773 return ggc_alloc_string (strip, len);
31775 #endif
31777 void
31778 rs6000_output_symbol_ref (FILE *file, rtx x)
31780 const char *name = XSTR (x, 0);
31782 /* Currently C++ toc references to vtables can be emitted before it
31783 is decided whether the vtable is public or private. If this is
31784 the case, then the linker will eventually complain that there is
31785 a reference to an unknown section. Thus, for vtables only,
31786 we emit the TOC reference to reference the identifier and not the
31787 symbol. */
31788 if (VTABLE_NAME_P (name))
31790 RS6000_OUTPUT_BASENAME (file, name);
31792 else
31793 assemble_name (file, name);
31796 /* Output a TOC entry. We derive the entry name from what is being
31797 written. */
31799 void
31800 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
31802 char buf[256];
31803 const char *name = buf;
31804 rtx base = x;
31805 HOST_WIDE_INT offset = 0;
31807 gcc_assert (!TARGET_NO_TOC);
31809 /* When the linker won't eliminate them, don't output duplicate
31810 TOC entries (this happens on AIX if there is any kind of TOC,
31811 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
31812 CODE_LABELs. */
31813 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
31815 struct toc_hash_struct *h;
31817 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
31818 time because GGC is not initialized at that point. */
31819 if (toc_hash_table == NULL)
31820 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
31822 h = ggc_alloc<toc_hash_struct> ();
31823 h->key = x;
31824 h->key_mode = mode;
31825 h->labelno = labelno;
31827 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
31828 if (*found == NULL)
31829 *found = h;
31830 else /* This is indeed a duplicate.
31831 Set this label equal to that label. */
31833 fputs ("\t.set ", file);
31834 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
31835 fprintf (file, "%d,", labelno);
31836 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
31837 fprintf (file, "%d\n", ((*found)->labelno));
31839 #ifdef HAVE_AS_TLS
31840 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
31841 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
31842 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
31844 fputs ("\t.set ", file);
31845 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
31846 fprintf (file, "%d,", labelno);
31847 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
31848 fprintf (file, "%d\n", ((*found)->labelno));
31850 #endif
31851 return;
31855 /* If we're going to put a double constant in the TOC, make sure it's
31856 aligned properly when strict alignment is on. */
31857 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
31858 && STRICT_ALIGNMENT
31859 && GET_MODE_BITSIZE (mode) >= 64
31860 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
31861 ASM_OUTPUT_ALIGN (file, 3);
31864 (*targetm.asm_out.internal_label) (file, "LC", labelno);
31866 /* Handle FP constants specially. Note that if we have a minimal
31867 TOC, things we put here aren't actually in the TOC, so we can allow
31868 FP constants. */
31869 if (GET_CODE (x) == CONST_DOUBLE &&
31870 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
31871 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
31873 long k[4];
31875 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31876 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
31877 else
31878 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
31880 if (TARGET_64BIT)
31882 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31883 fputs (DOUBLE_INT_ASM_OP, file);
31884 else
31885 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
31886 k[0] & 0xffffffff, k[1] & 0xffffffff,
31887 k[2] & 0xffffffff, k[3] & 0xffffffff);
31888 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
31889 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
31890 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
31891 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
31892 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
31893 return;
31895 else
31897 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31898 fputs ("\t.long ", file);
31899 else
31900 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
31901 k[0] & 0xffffffff, k[1] & 0xffffffff,
31902 k[2] & 0xffffffff, k[3] & 0xffffffff);
31903 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
31904 k[0] & 0xffffffff, k[1] & 0xffffffff,
31905 k[2] & 0xffffffff, k[3] & 0xffffffff);
31906 return;
31909 else if (GET_CODE (x) == CONST_DOUBLE &&
31910 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
31912 long k[2];
31914 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31915 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
31916 else
31917 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
31919 if (TARGET_64BIT)
31921 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31922 fputs (DOUBLE_INT_ASM_OP, file);
31923 else
31924 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31925 k[0] & 0xffffffff, k[1] & 0xffffffff);
31926 fprintf (file, "0x%lx%08lx\n",
31927 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
31928 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
31929 return;
31931 else
31933 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31934 fputs ("\t.long ", file);
31935 else
31936 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31937 k[0] & 0xffffffff, k[1] & 0xffffffff);
31938 fprintf (file, "0x%lx,0x%lx\n",
31939 k[0] & 0xffffffff, k[1] & 0xffffffff);
31940 return;
31943 else if (GET_CODE (x) == CONST_DOUBLE &&
31944 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
31946 long l;
31948 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31949 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
31950 else
31951 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
31953 if (TARGET_64BIT)
31955 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31956 fputs (DOUBLE_INT_ASM_OP, file);
31957 else
31958 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31959 if (WORDS_BIG_ENDIAN)
31960 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
31961 else
31962 fprintf (file, "0x%lx\n", l & 0xffffffff);
31963 return;
31965 else
31967 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31968 fputs ("\t.long ", file);
31969 else
31970 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31971 fprintf (file, "0x%lx\n", l & 0xffffffff);
31972 return;
31975 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
31977 unsigned HOST_WIDE_INT low;
31978 HOST_WIDE_INT high;
31980 low = INTVAL (x) & 0xffffffff;
31981 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
31983 /* TOC entries are always Pmode-sized, so when big-endian
31984 smaller integer constants in the TOC need to be padded.
31985 (This is still a win over putting the constants in
31986 a separate constant pool, because then we'd have
31987 to have both a TOC entry _and_ the actual constant.)
31989 For a 32-bit target, CONST_INT values are loaded and shifted
31990 entirely within `low' and can be stored in one TOC entry. */
31992 /* It would be easy to make this work, but it doesn't now. */
31993 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
31995 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
31997 low |= high << 32;
31998 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
31999 high = (HOST_WIDE_INT) low >> 32;
32000 low &= 0xffffffff;
32003 if (TARGET_64BIT)
32005 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32006 fputs (DOUBLE_INT_ASM_OP, file);
32007 else
32008 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32009 (long) high & 0xffffffff, (long) low & 0xffffffff);
32010 fprintf (file, "0x%lx%08lx\n",
32011 (long) high & 0xffffffff, (long) low & 0xffffffff);
32012 return;
32014 else
32016 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
32018 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32019 fputs ("\t.long ", file);
32020 else
32021 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32022 (long) high & 0xffffffff, (long) low & 0xffffffff);
32023 fprintf (file, "0x%lx,0x%lx\n",
32024 (long) high & 0xffffffff, (long) low & 0xffffffff);
32026 else
32028 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32029 fputs ("\t.long ", file);
32030 else
32031 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
32032 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
32034 return;
32038 if (GET_CODE (x) == CONST)
32040 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
32041 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
32043 base = XEXP (XEXP (x, 0), 0);
32044 offset = INTVAL (XEXP (XEXP (x, 0), 1));
32047 switch (GET_CODE (base))
32049 case SYMBOL_REF:
32050 name = XSTR (base, 0);
32051 break;
32053 case LABEL_REF:
32054 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
32055 CODE_LABEL_NUMBER (XEXP (base, 0)));
32056 break;
32058 case CODE_LABEL:
32059 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
32060 break;
32062 default:
32063 gcc_unreachable ();
32066 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32067 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
32068 else
32070 fputs ("\t.tc ", file);
32071 RS6000_OUTPUT_BASENAME (file, name);
32073 if (offset < 0)
32074 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
32075 else if (offset)
32076 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
32078 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32079 after other TOC symbols, reducing overflow of small TOC access
32080 to [TC] symbols. */
32081 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
32082 ? "[TE]," : "[TC],", file);
32085 /* Currently C++ toc references to vtables can be emitted before it
32086 is decided whether the vtable is public or private. If this is
32087 the case, then the linker will eventually complain that there is
32088 a TOC reference to an unknown section. Thus, for vtables only,
32089 we emit the TOC reference to reference the symbol and not the
32090 section. */
32091 if (VTABLE_NAME_P (name))
32093 RS6000_OUTPUT_BASENAME (file, name);
32094 if (offset < 0)
32095 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
32096 else if (offset > 0)
32097 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
32099 else
32100 output_addr_const (file, x);
32102 #if HAVE_AS_TLS
32103 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
32105 switch (SYMBOL_REF_TLS_MODEL (base))
32107 case 0:
32108 break;
32109 case TLS_MODEL_LOCAL_EXEC:
32110 fputs ("@le", file);
32111 break;
32112 case TLS_MODEL_INITIAL_EXEC:
32113 fputs ("@ie", file);
32114 break;
32115 /* Use global-dynamic for local-dynamic. */
32116 case TLS_MODEL_GLOBAL_DYNAMIC:
32117 case TLS_MODEL_LOCAL_DYNAMIC:
32118 putc ('\n', file);
32119 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
32120 fputs ("\t.tc .", file);
32121 RS6000_OUTPUT_BASENAME (file, name);
32122 fputs ("[TC],", file);
32123 output_addr_const (file, x);
32124 fputs ("@m", file);
32125 break;
32126 default:
32127 gcc_unreachable ();
32130 #endif
32132 putc ('\n', file);
32135 /* Output an assembler pseudo-op to write an ASCII string of N characters
32136 starting at P to FILE.
32138 On the RS/6000, we have to do this using the .byte operation and
32139 write out special characters outside the quoted string.
32140 Also, the assembler is broken; very long strings are truncated,
32141 so we must artificially break them up early. */
32143 void
32144 output_ascii (FILE *file, const char *p, int n)
32146 char c;
32147 int i, count_string;
32148 const char *for_string = "\t.byte \"";
32149 const char *for_decimal = "\t.byte ";
32150 const char *to_close = NULL;
32152 count_string = 0;
32153 for (i = 0; i < n; i++)
32155 c = *p++;
32156 if (c >= ' ' && c < 0177)
32158 if (for_string)
32159 fputs (for_string, file);
32160 putc (c, file);
32162 /* Write two quotes to get one. */
32163 if (c == '"')
32165 putc (c, file);
32166 ++count_string;
32169 for_string = NULL;
32170 for_decimal = "\"\n\t.byte ";
32171 to_close = "\"\n";
32172 ++count_string;
32174 if (count_string >= 512)
32176 fputs (to_close, file);
32178 for_string = "\t.byte \"";
32179 for_decimal = "\t.byte ";
32180 to_close = NULL;
32181 count_string = 0;
32184 else
32186 if (for_decimal)
32187 fputs (for_decimal, file);
32188 fprintf (file, "%d", c);
32190 for_string = "\n\t.byte \"";
32191 for_decimal = ", ";
32192 to_close = "\n";
32193 count_string = 0;
32197 /* Now close the string if we have written one. Then end the line. */
32198 if (to_close)
32199 fputs (to_close, file);
32202 /* Generate a unique section name for FILENAME for a section type
32203 represented by SECTION_DESC. Output goes into BUF.
32205 SECTION_DESC can be any string, as long as it is different for each
32206 possible section type.
32208 We name the section in the same manner as xlc. The name begins with an
32209 underscore followed by the filename (after stripping any leading directory
32210 names) with the last period replaced by the string SECTION_DESC. If
32211 FILENAME does not contain a period, SECTION_DESC is appended to the end of
32212 the name. */
32214 void
32215 rs6000_gen_section_name (char **buf, const char *filename,
32216 const char *section_desc)
32218 const char *q, *after_last_slash, *last_period = 0;
32219 char *p;
32220 int len;
32222 after_last_slash = filename;
32223 for (q = filename; *q; q++)
32225 if (*q == '/')
32226 after_last_slash = q + 1;
32227 else if (*q == '.')
32228 last_period = q;
32231 len = strlen (after_last_slash) + strlen (section_desc) + 2;
32232 *buf = (char *) xmalloc (len);
32234 p = *buf;
32235 *p++ = '_';
32237 for (q = after_last_slash; *q; q++)
32239 if (q == last_period)
32241 strcpy (p, section_desc);
32242 p += strlen (section_desc);
32243 break;
32246 else if (ISALNUM (*q))
32247 *p++ = *q;
32250 if (last_period == 0)
32251 strcpy (p, section_desc);
32252 else
32253 *p = '\0';
32256 /* Emit profile function. */
32258 void
32259 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
32261 /* Non-standard profiling for kernels, which just saves LR then calls
32262 _mcount without worrying about arg saves. The idea is to change
32263 the function prologue as little as possible as it isn't easy to
32264 account for arg save/restore code added just for _mcount. */
32265 if (TARGET_PROFILE_KERNEL)
32266 return;
32268 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32270 #ifndef NO_PROFILE_COUNTERS
32271 # define NO_PROFILE_COUNTERS 0
32272 #endif
32273 if (NO_PROFILE_COUNTERS)
32274 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
32275 LCT_NORMAL, VOIDmode, 0);
32276 else
32278 char buf[30];
32279 const char *label_name;
32280 rtx fun;
32282 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
32283 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
32284 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
32286 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
32287 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
32290 else if (DEFAULT_ABI == ABI_DARWIN)
32292 const char *mcount_name = RS6000_MCOUNT;
32293 int caller_addr_regno = LR_REGNO;
32295 /* Be conservative and always set this, at least for now. */
32296 crtl->uses_pic_offset_table = 1;
32298 #if TARGET_MACHO
32299 /* For PIC code, set up a stub and collect the caller's address
32300 from r0, which is where the prologue puts it. */
32301 if (MACHOPIC_INDIRECT
32302 && crtl->uses_pic_offset_table)
32303 caller_addr_regno = 0;
32304 #endif
32305 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
32306 LCT_NORMAL, VOIDmode, 1,
32307 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
32311 /* Write function profiler code. */
32313 void
32314 output_function_profiler (FILE *file, int labelno)
32316 char buf[100];
32318 switch (DEFAULT_ABI)
32320 default:
32321 gcc_unreachable ();
32323 case ABI_V4:
32324 if (!TARGET_32BIT)
32326 warning (0, "no profiling of 64-bit code for this ABI");
32327 return;
32329 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
32330 fprintf (file, "\tmflr %s\n", reg_names[0]);
32331 if (NO_PROFILE_COUNTERS)
32333 asm_fprintf (file, "\tstw %s,4(%s)\n",
32334 reg_names[0], reg_names[1]);
32336 else if (TARGET_SECURE_PLT && flag_pic)
32338 if (TARGET_LINK_STACK)
32340 char name[32];
32341 get_ppc476_thunk_name (name);
32342 asm_fprintf (file, "\tbl %s\n", name);
32344 else
32345 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
32346 asm_fprintf (file, "\tstw %s,4(%s)\n",
32347 reg_names[0], reg_names[1]);
32348 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
32349 asm_fprintf (file, "\taddis %s,%s,",
32350 reg_names[12], reg_names[12]);
32351 assemble_name (file, buf);
32352 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
32353 assemble_name (file, buf);
32354 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
32356 else if (flag_pic == 1)
32358 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
32359 asm_fprintf (file, "\tstw %s,4(%s)\n",
32360 reg_names[0], reg_names[1]);
32361 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
32362 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
32363 assemble_name (file, buf);
32364 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
32366 else if (flag_pic > 1)
32368 asm_fprintf (file, "\tstw %s,4(%s)\n",
32369 reg_names[0], reg_names[1]);
32370 /* Now, we need to get the address of the label. */
32371 if (TARGET_LINK_STACK)
32373 char name[32];
32374 get_ppc476_thunk_name (name);
32375 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
32376 assemble_name (file, buf);
32377 fputs ("-.\n1:", file);
32378 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
32379 asm_fprintf (file, "\taddi %s,%s,4\n",
32380 reg_names[11], reg_names[11]);
32382 else
32384 fputs ("\tbcl 20,31,1f\n\t.long ", file);
32385 assemble_name (file, buf);
32386 fputs ("-.\n1:", file);
32387 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
32389 asm_fprintf (file, "\tlwz %s,0(%s)\n",
32390 reg_names[0], reg_names[11]);
32391 asm_fprintf (file, "\tadd %s,%s,%s\n",
32392 reg_names[0], reg_names[0], reg_names[11]);
32394 else
32396 asm_fprintf (file, "\tlis %s,", reg_names[12]);
32397 assemble_name (file, buf);
32398 fputs ("@ha\n", file);
32399 asm_fprintf (file, "\tstw %s,4(%s)\n",
32400 reg_names[0], reg_names[1]);
32401 asm_fprintf (file, "\tla %s,", reg_names[0]);
32402 assemble_name (file, buf);
32403 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
32406 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
32407 fprintf (file, "\tbl %s%s\n",
32408 RS6000_MCOUNT, flag_pic ? "@plt" : "");
32409 break;
32411 case ABI_AIX:
32412 case ABI_ELFv2:
32413 case ABI_DARWIN:
32414 /* Don't do anything, done in output_profile_hook (). */
32415 break;
32421 /* The following variable value is the last issued insn. */
32423 static rtx_insn *last_scheduled_insn;
32425 /* The following variable helps to balance issuing of load and
32426 store instructions */
32428 static int load_store_pendulum;
32430 /* The following variable helps pair divide insns during scheduling. */
32431 static int divide_cnt;
32432 /* The following variable helps pair and alternate vector and vector load
32433 insns during scheduling. */
32434 static int vec_load_pendulum;
32437 /* Power4 load update and store update instructions are cracked into a
32438 load or store and an integer insn which are executed in the same cycle.
32439 Branches have their own dispatch slot which does not count against the
32440 GCC issue rate, but it changes the program flow so there are no other
32441 instructions to issue in this cycle. */
32443 static int
32444 rs6000_variable_issue_1 (rtx_insn *insn, int more)
32446 last_scheduled_insn = insn;
32447 if (GET_CODE (PATTERN (insn)) == USE
32448 || GET_CODE (PATTERN (insn)) == CLOBBER)
32450 cached_can_issue_more = more;
32451 return cached_can_issue_more;
32454 if (insn_terminates_group_p (insn, current_group))
32456 cached_can_issue_more = 0;
32457 return cached_can_issue_more;
32460 /* If no reservation, but reach here */
32461 if (recog_memoized (insn) < 0)
32462 return more;
32464 if (rs6000_sched_groups)
32466 if (is_microcoded_insn (insn))
32467 cached_can_issue_more = 0;
32468 else if (is_cracked_insn (insn))
32469 cached_can_issue_more = more > 2 ? more - 2 : 0;
32470 else
32471 cached_can_issue_more = more - 1;
32473 return cached_can_issue_more;
32476 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
32477 return 0;
32479 cached_can_issue_more = more - 1;
32480 return cached_can_issue_more;
32483 static int
32484 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
32486 int r = rs6000_variable_issue_1 (insn, more);
32487 if (verbose)
32488 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
32489 return r;
32492 /* Adjust the cost of a scheduling dependency. Return the new cost of
32493 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
32495 static int
32496 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
32497 unsigned int)
32499 enum attr_type attr_type;
32501 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
32502 return cost;
32504 switch (dep_type)
32506 case REG_DEP_TRUE:
32508 /* Data dependency; DEP_INSN writes a register that INSN reads
32509 some cycles later. */
32511 /* Separate a load from a narrower, dependent store. */
32512 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
32513 && GET_CODE (PATTERN (insn)) == SET
32514 && GET_CODE (PATTERN (dep_insn)) == SET
32515 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
32516 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
32517 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
32518 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
32519 return cost + 14;
32521 attr_type = get_attr_type (insn);
32523 switch (attr_type)
32525 case TYPE_JMPREG:
32526 /* Tell the first scheduling pass about the latency between
32527 a mtctr and bctr (and mtlr and br/blr). The first
32528 scheduling pass will not know about this latency since
32529 the mtctr instruction, which has the latency associated
32530 to it, will be generated by reload. */
32531 return 4;
32532 case TYPE_BRANCH:
32533 /* Leave some extra cycles between a compare and its
32534 dependent branch, to inhibit expensive mispredicts. */
32535 if ((rs6000_cpu_attr == CPU_PPC603
32536 || rs6000_cpu_attr == CPU_PPC604
32537 || rs6000_cpu_attr == CPU_PPC604E
32538 || rs6000_cpu_attr == CPU_PPC620
32539 || rs6000_cpu_attr == CPU_PPC630
32540 || rs6000_cpu_attr == CPU_PPC750
32541 || rs6000_cpu_attr == CPU_PPC7400
32542 || rs6000_cpu_attr == CPU_PPC7450
32543 || rs6000_cpu_attr == CPU_PPCE5500
32544 || rs6000_cpu_attr == CPU_PPCE6500
32545 || rs6000_cpu_attr == CPU_POWER4
32546 || rs6000_cpu_attr == CPU_POWER5
32547 || rs6000_cpu_attr == CPU_POWER7
32548 || rs6000_cpu_attr == CPU_POWER8
32549 || rs6000_cpu_attr == CPU_POWER9
32550 || rs6000_cpu_attr == CPU_CELL)
32551 && recog_memoized (dep_insn)
32552 && (INSN_CODE (dep_insn) >= 0))
32554 switch (get_attr_type (dep_insn))
32556 case TYPE_CMP:
32557 case TYPE_FPCOMPARE:
32558 case TYPE_CR_LOGICAL:
32559 case TYPE_DELAYED_CR:
32560 return cost + 2;
32561 case TYPE_EXTS:
32562 case TYPE_MUL:
32563 if (get_attr_dot (dep_insn) == DOT_YES)
32564 return cost + 2;
32565 else
32566 break;
32567 case TYPE_SHIFT:
32568 if (get_attr_dot (dep_insn) == DOT_YES
32569 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
32570 return cost + 2;
32571 else
32572 break;
32573 default:
32574 break;
32576 break;
32578 case TYPE_STORE:
32579 case TYPE_FPSTORE:
32580 if ((rs6000_cpu == PROCESSOR_POWER6)
32581 && recog_memoized (dep_insn)
32582 && (INSN_CODE (dep_insn) >= 0))
32585 if (GET_CODE (PATTERN (insn)) != SET)
32586 /* If this happens, we have to extend this to schedule
32587 optimally. Return default for now. */
32588 return cost;
32590 /* Adjust the cost for the case where the value written
32591 by a fixed point operation is used as the address
32592 gen value on a store. */
32593 switch (get_attr_type (dep_insn))
32595 case TYPE_LOAD:
32596 case TYPE_CNTLZ:
32598 if (! store_data_bypass_p (dep_insn, insn))
32599 return get_attr_sign_extend (dep_insn)
32600 == SIGN_EXTEND_YES ? 6 : 4;
32601 break;
32603 case TYPE_SHIFT:
32605 if (! store_data_bypass_p (dep_insn, insn))
32606 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
32607 6 : 3;
32608 break;
32610 case TYPE_INTEGER:
32611 case TYPE_ADD:
32612 case TYPE_LOGICAL:
32613 case TYPE_EXTS:
32614 case TYPE_INSERT:
32616 if (! store_data_bypass_p (dep_insn, insn))
32617 return 3;
32618 break;
32620 case TYPE_STORE:
32621 case TYPE_FPLOAD:
32622 case TYPE_FPSTORE:
32624 if (get_attr_update (dep_insn) == UPDATE_YES
32625 && ! store_data_bypass_p (dep_insn, insn))
32626 return 3;
32627 break;
32629 case TYPE_MUL:
32631 if (! store_data_bypass_p (dep_insn, insn))
32632 return 17;
32633 break;
32635 case TYPE_DIV:
32637 if (! store_data_bypass_p (dep_insn, insn))
32638 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
32639 break;
32641 default:
32642 break;
32645 break;
32647 case TYPE_LOAD:
32648 if ((rs6000_cpu == PROCESSOR_POWER6)
32649 && recog_memoized (dep_insn)
32650 && (INSN_CODE (dep_insn) >= 0))
32653 /* Adjust the cost for the case where the value written
32654 by a fixed point instruction is used within the address
32655 gen portion of a subsequent load(u)(x) */
32656 switch (get_attr_type (dep_insn))
32658 case TYPE_LOAD:
32659 case TYPE_CNTLZ:
32661 if (set_to_load_agen (dep_insn, insn))
32662 return get_attr_sign_extend (dep_insn)
32663 == SIGN_EXTEND_YES ? 6 : 4;
32664 break;
32666 case TYPE_SHIFT:
32668 if (set_to_load_agen (dep_insn, insn))
32669 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
32670 6 : 3;
32671 break;
32673 case TYPE_INTEGER:
32674 case TYPE_ADD:
32675 case TYPE_LOGICAL:
32676 case TYPE_EXTS:
32677 case TYPE_INSERT:
32679 if (set_to_load_agen (dep_insn, insn))
32680 return 3;
32681 break;
32683 case TYPE_STORE:
32684 case TYPE_FPLOAD:
32685 case TYPE_FPSTORE:
32687 if (get_attr_update (dep_insn) == UPDATE_YES
32688 && set_to_load_agen (dep_insn, insn))
32689 return 3;
32690 break;
32692 case TYPE_MUL:
32694 if (set_to_load_agen (dep_insn, insn))
32695 return 17;
32696 break;
32698 case TYPE_DIV:
32700 if (set_to_load_agen (dep_insn, insn))
32701 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
32702 break;
32704 default:
32705 break;
32708 break;
32710 case TYPE_FPLOAD:
32711 if ((rs6000_cpu == PROCESSOR_POWER6)
32712 && get_attr_update (insn) == UPDATE_NO
32713 && recog_memoized (dep_insn)
32714 && (INSN_CODE (dep_insn) >= 0)
32715 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
32716 return 2;
32718 default:
32719 break;
32722 /* Fall out to return default cost. */
32724 break;
32726 case REG_DEP_OUTPUT:
32727 /* Output dependency; DEP_INSN writes a register that INSN writes some
32728 cycles later. */
32729 if ((rs6000_cpu == PROCESSOR_POWER6)
32730 && recog_memoized (dep_insn)
32731 && (INSN_CODE (dep_insn) >= 0))
32733 attr_type = get_attr_type (insn);
32735 switch (attr_type)
32737 case TYPE_FP:
32738 case TYPE_FPSIMPLE:
32739 if (get_attr_type (dep_insn) == TYPE_FP
32740 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
32741 return 1;
32742 break;
32743 case TYPE_FPLOAD:
32744 if (get_attr_update (insn) == UPDATE_NO
32745 && get_attr_type (dep_insn) == TYPE_MFFGPR)
32746 return 2;
32747 break;
32748 default:
32749 break;
32752 /* Fall through, no cost for output dependency. */
32753 /* FALLTHRU */
32755 case REG_DEP_ANTI:
32756 /* Anti dependency; DEP_INSN reads a register that INSN writes some
32757 cycles later. */
32758 return 0;
32760 default:
32761 gcc_unreachable ();
32764 return cost;
32767 /* Debug version of rs6000_adjust_cost. */
32769 static int
32770 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
32771 int cost, unsigned int dw)
32773 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
32775 if (ret != cost)
32777 const char *dep;
32779 switch (dep_type)
32781 default: dep = "unknown depencency"; break;
32782 case REG_DEP_TRUE: dep = "data dependency"; break;
32783 case REG_DEP_OUTPUT: dep = "output dependency"; break;
32784 case REG_DEP_ANTI: dep = "anti depencency"; break;
32787 fprintf (stderr,
32788 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
32789 "%s, insn:\n", ret, cost, dep);
32791 debug_rtx (insn);
32794 return ret;
32797 /* The function returns a true if INSN is microcoded.
32798 Return false otherwise. */
32800 static bool
32801 is_microcoded_insn (rtx_insn *insn)
32803 if (!insn || !NONDEBUG_INSN_P (insn)
32804 || GET_CODE (PATTERN (insn)) == USE
32805 || GET_CODE (PATTERN (insn)) == CLOBBER)
32806 return false;
32808 if (rs6000_cpu_attr == CPU_CELL)
32809 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
32811 if (rs6000_sched_groups
32812 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
32814 enum attr_type type = get_attr_type (insn);
32815 if ((type == TYPE_LOAD
32816 && get_attr_update (insn) == UPDATE_YES
32817 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
32818 || ((type == TYPE_LOAD || type == TYPE_STORE)
32819 && get_attr_update (insn) == UPDATE_YES
32820 && get_attr_indexed (insn) == INDEXED_YES)
32821 || type == TYPE_MFCR)
32822 return true;
32825 return false;
32828 /* The function returns true if INSN is cracked into 2 instructions
32829 by the processor (and therefore occupies 2 issue slots). */
32831 static bool
32832 is_cracked_insn (rtx_insn *insn)
32834 if (!insn || !NONDEBUG_INSN_P (insn)
32835 || GET_CODE (PATTERN (insn)) == USE
32836 || GET_CODE (PATTERN (insn)) == CLOBBER)
32837 return false;
32839 if (rs6000_sched_groups
32840 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
32842 enum attr_type type = get_attr_type (insn);
32843 if ((type == TYPE_LOAD
32844 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32845 && get_attr_update (insn) == UPDATE_NO)
32846 || (type == TYPE_LOAD
32847 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
32848 && get_attr_update (insn) == UPDATE_YES
32849 && get_attr_indexed (insn) == INDEXED_NO)
32850 || (type == TYPE_STORE
32851 && get_attr_update (insn) == UPDATE_YES
32852 && get_attr_indexed (insn) == INDEXED_NO)
32853 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
32854 && get_attr_update (insn) == UPDATE_YES)
32855 || type == TYPE_DELAYED_CR
32856 || (type == TYPE_EXTS
32857 && get_attr_dot (insn) == DOT_YES)
32858 || (type == TYPE_SHIFT
32859 && get_attr_dot (insn) == DOT_YES
32860 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
32861 || (type == TYPE_MUL
32862 && get_attr_dot (insn) == DOT_YES)
32863 || type == TYPE_DIV
32864 || (type == TYPE_INSERT
32865 && get_attr_size (insn) == SIZE_32))
32866 return true;
32869 return false;
32872 /* The function returns true if INSN can be issued only from
32873 the branch slot. */
32875 static bool
32876 is_branch_slot_insn (rtx_insn *insn)
32878 if (!insn || !NONDEBUG_INSN_P (insn)
32879 || GET_CODE (PATTERN (insn)) == USE
32880 || GET_CODE (PATTERN (insn)) == CLOBBER)
32881 return false;
32883 if (rs6000_sched_groups)
32885 enum attr_type type = get_attr_type (insn);
32886 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
32887 return true;
32888 return false;
32891 return false;
32894 /* The function returns true if out_inst sets a value that is
32895 used in the address generation computation of in_insn */
32896 static bool
32897 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
32899 rtx out_set, in_set;
32901 /* For performance reasons, only handle the simple case where
32902 both loads are a single_set. */
32903 out_set = single_set (out_insn);
32904 if (out_set)
32906 in_set = single_set (in_insn);
32907 if (in_set)
32908 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
32911 return false;
32914 /* Try to determine base/offset/size parts of the given MEM.
32915 Return true if successful, false if all the values couldn't
32916 be determined.
32918 This function only looks for REG or REG+CONST address forms.
32919 REG+REG address form will return false. */
32921 static bool
32922 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
32923 HOST_WIDE_INT *size)
32925 rtx addr_rtx;
32926 if MEM_SIZE_KNOWN_P (mem)
32927 *size = MEM_SIZE (mem);
32928 else
32929 return false;
32931 addr_rtx = (XEXP (mem, 0));
32932 if (GET_CODE (addr_rtx) == PRE_MODIFY)
32933 addr_rtx = XEXP (addr_rtx, 1);
32935 *offset = 0;
32936 while (GET_CODE (addr_rtx) == PLUS
32937 && CONST_INT_P (XEXP (addr_rtx, 1)))
32939 *offset += INTVAL (XEXP (addr_rtx, 1));
32940 addr_rtx = XEXP (addr_rtx, 0);
32942 if (!REG_P (addr_rtx))
32943 return false;
32945 *base = addr_rtx;
32946 return true;
32949 /* The function returns true if the target storage location of
32950 mem1 is adjacent to the target storage location of mem2 */
32951 /* Return 1 if memory locations are adjacent. */
32953 static bool
32954 adjacent_mem_locations (rtx mem1, rtx mem2)
32956 rtx reg1, reg2;
32957 HOST_WIDE_INT off1, size1, off2, size2;
32959 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32960 && get_memref_parts (mem2, &reg2, &off2, &size2))
32961 return ((REGNO (reg1) == REGNO (reg2))
32962 && ((off1 + size1 == off2)
32963 || (off2 + size2 == off1)));
32965 return false;
32968 /* This function returns true if it can be determined that the two MEM
32969 locations overlap by at least 1 byte based on base reg/offset/size. */
32971 static bool
32972 mem_locations_overlap (rtx mem1, rtx mem2)
32974 rtx reg1, reg2;
32975 HOST_WIDE_INT off1, size1, off2, size2;
32977 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32978 && get_memref_parts (mem2, &reg2, &off2, &size2))
32979 return ((REGNO (reg1) == REGNO (reg2))
32980 && (((off1 <= off2) && (off1 + size1 > off2))
32981 || ((off2 <= off1) && (off2 + size2 > off1))));
32983 return false;
32986 /* A C statement (sans semicolon) to update the integer scheduling
32987 priority INSN_PRIORITY (INSN). Increase the priority to execute the
32988 INSN earlier, reduce the priority to execute INSN later. Do not
32989 define this macro if you do not need to adjust the scheduling
32990 priorities of insns. */
32992 static int
32993 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
32995 rtx load_mem, str_mem;
32996 /* On machines (like the 750) which have asymmetric integer units,
32997 where one integer unit can do multiply and divides and the other
32998 can't, reduce the priority of multiply/divide so it is scheduled
32999 before other integer operations. */
33001 #if 0
33002 if (! INSN_P (insn))
33003 return priority;
33005 if (GET_CODE (PATTERN (insn)) == USE)
33006 return priority;
33008 switch (rs6000_cpu_attr) {
33009 case CPU_PPC750:
33010 switch (get_attr_type (insn))
33012 default:
33013 break;
33015 case TYPE_MUL:
33016 case TYPE_DIV:
33017 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
33018 priority, priority);
33019 if (priority >= 0 && priority < 0x01000000)
33020 priority >>= 3;
33021 break;
33024 #endif
33026 if (insn_must_be_first_in_group (insn)
33027 && reload_completed
33028 && current_sched_info->sched_max_insns_priority
33029 && rs6000_sched_restricted_insns_priority)
33032 /* Prioritize insns that can be dispatched only in the first
33033 dispatch slot. */
33034 if (rs6000_sched_restricted_insns_priority == 1)
33035 /* Attach highest priority to insn. This means that in
33036 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33037 precede 'priority' (critical path) considerations. */
33038 return current_sched_info->sched_max_insns_priority;
33039 else if (rs6000_sched_restricted_insns_priority == 2)
33040 /* Increase priority of insn by a minimal amount. This means that in
33041 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33042 considerations precede dispatch-slot restriction considerations. */
33043 return (priority + 1);
33046 if (rs6000_cpu == PROCESSOR_POWER6
33047 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
33048 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
33049 /* Attach highest priority to insn if the scheduler has just issued two
33050 stores and this instruction is a load, or two loads and this instruction
33051 is a store. Power6 wants loads and stores scheduled alternately
33052 when possible */
33053 return current_sched_info->sched_max_insns_priority;
33055 return priority;
33058 /* Return true if the instruction is nonpipelined on the Cell. */
33059 static bool
33060 is_nonpipeline_insn (rtx_insn *insn)
33062 enum attr_type type;
33063 if (!insn || !NONDEBUG_INSN_P (insn)
33064 || GET_CODE (PATTERN (insn)) == USE
33065 || GET_CODE (PATTERN (insn)) == CLOBBER)
33066 return false;
33068 type = get_attr_type (insn);
33069 if (type == TYPE_MUL
33070 || type == TYPE_DIV
33071 || type == TYPE_SDIV
33072 || type == TYPE_DDIV
33073 || type == TYPE_SSQRT
33074 || type == TYPE_DSQRT
33075 || type == TYPE_MFCR
33076 || type == TYPE_MFCRF
33077 || type == TYPE_MFJMPR)
33079 return true;
33081 return false;
33085 /* Return how many instructions the machine can issue per cycle. */
33087 static int
33088 rs6000_issue_rate (void)
33090 /* Unless scheduling for register pressure, use issue rate of 1 for
33091 first scheduling pass to decrease degradation. */
33092 if (!reload_completed && !flag_sched_pressure)
33093 return 1;
33095 switch (rs6000_cpu_attr) {
33096 case CPU_RS64A:
33097 case CPU_PPC601: /* ? */
33098 case CPU_PPC7450:
33099 return 3;
33100 case CPU_PPC440:
33101 case CPU_PPC603:
33102 case CPU_PPC750:
33103 case CPU_PPC7400:
33104 case CPU_PPC8540:
33105 case CPU_PPC8548:
33106 case CPU_CELL:
33107 case CPU_PPCE300C2:
33108 case CPU_PPCE300C3:
33109 case CPU_PPCE500MC:
33110 case CPU_PPCE500MC64:
33111 case CPU_PPCE5500:
33112 case CPU_PPCE6500:
33113 case CPU_TITAN:
33114 return 2;
33115 case CPU_PPC476:
33116 case CPU_PPC604:
33117 case CPU_PPC604E:
33118 case CPU_PPC620:
33119 case CPU_PPC630:
33120 return 4;
33121 case CPU_POWER4:
33122 case CPU_POWER5:
33123 case CPU_POWER6:
33124 case CPU_POWER7:
33125 return 5;
33126 case CPU_POWER8:
33127 return 7;
33128 case CPU_POWER9:
33129 return 6;
33130 default:
33131 return 1;
33135 /* Return how many instructions to look ahead for better insn
33136 scheduling. */
33138 static int
33139 rs6000_use_sched_lookahead (void)
33141 switch (rs6000_cpu_attr)
33143 case CPU_PPC8540:
33144 case CPU_PPC8548:
33145 return 4;
33147 case CPU_CELL:
33148 return (reload_completed ? 8 : 0);
33150 default:
33151 return 0;
33155 /* We are choosing insn from the ready queue. Return zero if INSN can be
33156 chosen. */
33157 static int
33158 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
33160 if (ready_index == 0)
33161 return 0;
33163 if (rs6000_cpu_attr != CPU_CELL)
33164 return 0;
33166 gcc_assert (insn != NULL_RTX && INSN_P (insn));
33168 if (!reload_completed
33169 || is_nonpipeline_insn (insn)
33170 || is_microcoded_insn (insn))
33171 return 1;
33173 return 0;
33176 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
33177 and return true. */
33179 static bool
33180 find_mem_ref (rtx pat, rtx *mem_ref)
33182 const char * fmt;
33183 int i, j;
33185 /* stack_tie does not produce any real memory traffic. */
33186 if (tie_operand (pat, VOIDmode))
33187 return false;
33189 if (GET_CODE (pat) == MEM)
33191 *mem_ref = pat;
33192 return true;
33195 /* Recursively process the pattern. */
33196 fmt = GET_RTX_FORMAT (GET_CODE (pat));
33198 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
33200 if (fmt[i] == 'e')
33202 if (find_mem_ref (XEXP (pat, i), mem_ref))
33203 return true;
33205 else if (fmt[i] == 'E')
33206 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
33208 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
33209 return true;
33213 return false;
33216 /* Determine if PAT is a PATTERN of a load insn. */
33218 static bool
33219 is_load_insn1 (rtx pat, rtx *load_mem)
33221 if (!pat || pat == NULL_RTX)
33222 return false;
33224 if (GET_CODE (pat) == SET)
33225 return find_mem_ref (SET_SRC (pat), load_mem);
33227 if (GET_CODE (pat) == PARALLEL)
33229 int i;
33231 for (i = 0; i < XVECLEN (pat, 0); i++)
33232 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
33233 return true;
33236 return false;
33239 /* Determine if INSN loads from memory. */
33241 static bool
33242 is_load_insn (rtx insn, rtx *load_mem)
33244 if (!insn || !INSN_P (insn))
33245 return false;
33247 if (CALL_P (insn))
33248 return false;
33250 return is_load_insn1 (PATTERN (insn), load_mem);
33253 /* Determine if PAT is a PATTERN of a store insn. */
33255 static bool
33256 is_store_insn1 (rtx pat, rtx *str_mem)
33258 if (!pat || pat == NULL_RTX)
33259 return false;
33261 if (GET_CODE (pat) == SET)
33262 return find_mem_ref (SET_DEST (pat), str_mem);
33264 if (GET_CODE (pat) == PARALLEL)
33266 int i;
33268 for (i = 0; i < XVECLEN (pat, 0); i++)
33269 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
33270 return true;
33273 return false;
33276 /* Determine if INSN stores to memory. */
33278 static bool
33279 is_store_insn (rtx insn, rtx *str_mem)
33281 if (!insn || !INSN_P (insn))
33282 return false;
33284 return is_store_insn1 (PATTERN (insn), str_mem);
33287 /* Return whether TYPE is a Power9 pairable vector instruction type. */
33289 static bool
33290 is_power9_pairable_vec_type (enum attr_type type)
33292 switch (type)
33294 case TYPE_VECSIMPLE:
33295 case TYPE_VECCOMPLEX:
33296 case TYPE_VECDIV:
33297 case TYPE_VECCMP:
33298 case TYPE_VECPERM:
33299 case TYPE_VECFLOAT:
33300 case TYPE_VECFDIV:
33301 case TYPE_VECDOUBLE:
33302 return true;
33303 default:
33304 break;
33306 return false;
33309 /* Returns whether the dependence between INSN and NEXT is considered
33310 costly by the given target. */
33312 static bool
33313 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
33315 rtx insn;
33316 rtx next;
33317 rtx load_mem, str_mem;
33319 /* If the flag is not enabled - no dependence is considered costly;
33320 allow all dependent insns in the same group.
33321 This is the most aggressive option. */
33322 if (rs6000_sched_costly_dep == no_dep_costly)
33323 return false;
33325 /* If the flag is set to 1 - a dependence is always considered costly;
33326 do not allow dependent instructions in the same group.
33327 This is the most conservative option. */
33328 if (rs6000_sched_costly_dep == all_deps_costly)
33329 return true;
33331 insn = DEP_PRO (dep);
33332 next = DEP_CON (dep);
33334 if (rs6000_sched_costly_dep == store_to_load_dep_costly
33335 && is_load_insn (next, &load_mem)
33336 && is_store_insn (insn, &str_mem))
33337 /* Prevent load after store in the same group. */
33338 return true;
33340 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
33341 && is_load_insn (next, &load_mem)
33342 && is_store_insn (insn, &str_mem)
33343 && DEP_TYPE (dep) == REG_DEP_TRUE
33344 && mem_locations_overlap(str_mem, load_mem))
33345 /* Prevent load after store in the same group if it is a true
33346 dependence. */
33347 return true;
33349 /* The flag is set to X; dependences with latency >= X are considered costly,
33350 and will not be scheduled in the same group. */
33351 if (rs6000_sched_costly_dep <= max_dep_latency
33352 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
33353 return true;
33355 return false;
33358 /* Return the next insn after INSN that is found before TAIL is reached,
33359 skipping any "non-active" insns - insns that will not actually occupy
33360 an issue slot. Return NULL_RTX if such an insn is not found. */
33362 static rtx_insn *
33363 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
33365 if (insn == NULL_RTX || insn == tail)
33366 return NULL;
33368 while (1)
33370 insn = NEXT_INSN (insn);
33371 if (insn == NULL_RTX || insn == tail)
33372 return NULL;
33374 if (CALL_P (insn)
33375 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
33376 || (NONJUMP_INSN_P (insn)
33377 && GET_CODE (PATTERN (insn)) != USE
33378 && GET_CODE (PATTERN (insn)) != CLOBBER
33379 && INSN_CODE (insn) != CODE_FOR_stack_tie))
33380 break;
33382 return insn;
33385 /* Do Power9 specific sched_reorder2 reordering of ready list. */
33387 static int
33388 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
33390 int pos;
33391 int i;
33392 rtx_insn *tmp;
33393 enum attr_type type;
33395 type = get_attr_type (last_scheduled_insn);
33397 /* Try to issue fixed point divides back-to-back in pairs so they will be
33398 routed to separate execution units and execute in parallel. */
33399 if (type == TYPE_DIV && divide_cnt == 0)
33401 /* First divide has been scheduled. */
33402 divide_cnt = 1;
33404 /* Scan the ready list looking for another divide, if found move it
33405 to the end of the list so it is chosen next. */
33406 pos = lastpos;
33407 while (pos >= 0)
33409 if (recog_memoized (ready[pos]) >= 0
33410 && get_attr_type (ready[pos]) == TYPE_DIV)
33412 tmp = ready[pos];
33413 for (i = pos; i < lastpos; i++)
33414 ready[i] = ready[i + 1];
33415 ready[lastpos] = tmp;
33416 break;
33418 pos--;
33421 else
33423 /* Last insn was the 2nd divide or not a divide, reset the counter. */
33424 divide_cnt = 0;
33426 /* Power9 can execute 2 vector operations and 2 vector loads in a single
33427 cycle. So try to pair up and alternate groups of vector and vector
33428 load instructions.
33430 To aid this formation, a counter is maintained to keep track of
33431 vec/vecload insns issued. The value of vec_load_pendulum maintains
33432 the current state with the following values:
33434 0 : Initial state, no vec/vecload group has been started.
33436 -1 : 1 vector load has been issued and another has been found on
33437 the ready list and moved to the end.
33439 -2 : 2 vector loads have been issued and a vector operation has
33440 been found and moved to the end of the ready list.
33442 -3 : 2 vector loads and a vector insn have been issued and a
33443 vector operation has been found and moved to the end of the
33444 ready list.
33446 1 : 1 vector insn has been issued and another has been found and
33447 moved to the end of the ready list.
33449 2 : 2 vector insns have been issued and a vector load has been
33450 found and moved to the end of the ready list.
33452 3 : 2 vector insns and a vector load have been issued and another
33453 vector load has been found and moved to the end of the ready
33454 list. */
33455 if (type == TYPE_VECLOAD)
33457 /* Issued a vecload. */
33458 if (vec_load_pendulum == 0)
33460 /* We issued a single vecload, look for another and move it to
33461 the end of the ready list so it will be scheduled next.
33462 Set pendulum if found. */
33463 pos = lastpos;
33464 while (pos >= 0)
33466 if (recog_memoized (ready[pos]) >= 0
33467 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
33469 tmp = ready[pos];
33470 for (i = pos; i < lastpos; i++)
33471 ready[i] = ready[i + 1];
33472 ready[lastpos] = tmp;
33473 vec_load_pendulum = -1;
33474 return cached_can_issue_more;
33476 pos--;
33479 else if (vec_load_pendulum == -1)
33481 /* This is the second vecload we've issued, search the ready
33482 list for a vector operation so we can try to schedule a
33483 pair of those next. If found move to the end of the ready
33484 list so it is scheduled next and set the pendulum. */
33485 pos = lastpos;
33486 while (pos >= 0)
33488 if (recog_memoized (ready[pos]) >= 0
33489 && is_power9_pairable_vec_type (
33490 get_attr_type (ready[pos])))
33492 tmp = ready[pos];
33493 for (i = pos; i < lastpos; i++)
33494 ready[i] = ready[i + 1];
33495 ready[lastpos] = tmp;
33496 vec_load_pendulum = -2;
33497 return cached_can_issue_more;
33499 pos--;
33502 else if (vec_load_pendulum == 2)
33504 /* Two vector ops have been issued and we've just issued a
33505 vecload, look for another vecload and move to end of ready
33506 list if found. */
33507 pos = lastpos;
33508 while (pos >= 0)
33510 if (recog_memoized (ready[pos]) >= 0
33511 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
33513 tmp = ready[pos];
33514 for (i = pos; i < lastpos; i++)
33515 ready[i] = ready[i + 1];
33516 ready[lastpos] = tmp;
33517 /* Set pendulum so that next vecload will be seen as
33518 finishing a group, not start of one. */
33519 vec_load_pendulum = 3;
33520 return cached_can_issue_more;
33522 pos--;
33526 else if (is_power9_pairable_vec_type (type))
33528 /* Issued a vector operation. */
33529 if (vec_load_pendulum == 0)
33530 /* We issued a single vec op, look for another and move it
33531 to the end of the ready list so it will be scheduled next.
33532 Set pendulum if found. */
33534 pos = lastpos;
33535 while (pos >= 0)
33537 if (recog_memoized (ready[pos]) >= 0
33538 && is_power9_pairable_vec_type (
33539 get_attr_type (ready[pos])))
33541 tmp = ready[pos];
33542 for (i = pos; i < lastpos; i++)
33543 ready[i] = ready[i + 1];
33544 ready[lastpos] = tmp;
33545 vec_load_pendulum = 1;
33546 return cached_can_issue_more;
33548 pos--;
33551 else if (vec_load_pendulum == 1)
33553 /* This is the second vec op we've issued, search the ready
33554 list for a vecload operation so we can try to schedule a
33555 pair of those next. If found move to the end of the ready
33556 list so it is scheduled next and set the pendulum. */
33557 pos = lastpos;
33558 while (pos >= 0)
33560 if (recog_memoized (ready[pos]) >= 0
33561 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
33563 tmp = ready[pos];
33564 for (i = pos; i < lastpos; i++)
33565 ready[i] = ready[i + 1];
33566 ready[lastpos] = tmp;
33567 vec_load_pendulum = 2;
33568 return cached_can_issue_more;
33570 pos--;
33573 else if (vec_load_pendulum == -2)
33575 /* Two vecload ops have been issued and we've just issued a
33576 vec op, look for another vec op and move to end of ready
33577 list if found. */
33578 pos = lastpos;
33579 while (pos >= 0)
33581 if (recog_memoized (ready[pos]) >= 0
33582 && is_power9_pairable_vec_type (
33583 get_attr_type (ready[pos])))
33585 tmp = ready[pos];
33586 for (i = pos; i < lastpos; i++)
33587 ready[i] = ready[i + 1];
33588 ready[lastpos] = tmp;
33589 /* Set pendulum so that next vec op will be seen as
33590 finishing a group, not start of one. */
33591 vec_load_pendulum = -3;
33592 return cached_can_issue_more;
33594 pos--;
33599 /* We've either finished a vec/vecload group, couldn't find an insn to
33600 continue the current group, or the last insn had nothing to do with
33601 with a group. In any case, reset the pendulum. */
33602 vec_load_pendulum = 0;
33605 return cached_can_issue_more;
33608 /* We are about to begin issuing insns for this clock cycle. */
33610 static int
33611 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
33612 rtx_insn **ready ATTRIBUTE_UNUSED,
33613 int *pn_ready ATTRIBUTE_UNUSED,
33614 int clock_var ATTRIBUTE_UNUSED)
33616 int n_ready = *pn_ready;
33618 if (sched_verbose)
33619 fprintf (dump, "// rs6000_sched_reorder :\n");
33621 /* Reorder the ready list, if the second to last ready insn
33622 is a nonepipeline insn. */
33623 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
33625 if (is_nonpipeline_insn (ready[n_ready - 1])
33626 && (recog_memoized (ready[n_ready - 2]) > 0))
33627 /* Simply swap first two insns. */
33628 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
33631 if (rs6000_cpu == PROCESSOR_POWER6)
33632 load_store_pendulum = 0;
33634 return rs6000_issue_rate ();
33637 /* Like rs6000_sched_reorder, but called after issuing each insn. */
33639 static int
33640 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
33641 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
33643 if (sched_verbose)
33644 fprintf (dump, "// rs6000_sched_reorder2 :\n");
33646 /* For Power6, we need to handle some special cases to try and keep the
33647 store queue from overflowing and triggering expensive flushes.
33649 This code monitors how load and store instructions are being issued
33650 and skews the ready list one way or the other to increase the likelihood
33651 that a desired instruction is issued at the proper time.
33653 A couple of things are done. First, we maintain a "load_store_pendulum"
33654 to track the current state of load/store issue.
33656 - If the pendulum is at zero, then no loads or stores have been
33657 issued in the current cycle so we do nothing.
33659 - If the pendulum is 1, then a single load has been issued in this
33660 cycle and we attempt to locate another load in the ready list to
33661 issue with it.
33663 - If the pendulum is -2, then two stores have already been
33664 issued in this cycle, so we increase the priority of the first load
33665 in the ready list to increase it's likelihood of being chosen first
33666 in the next cycle.
33668 - If the pendulum is -1, then a single store has been issued in this
33669 cycle and we attempt to locate another store in the ready list to
33670 issue with it, preferring a store to an adjacent memory location to
33671 facilitate store pairing in the store queue.
33673 - If the pendulum is 2, then two loads have already been
33674 issued in this cycle, so we increase the priority of the first store
33675 in the ready list to increase it's likelihood of being chosen first
33676 in the next cycle.
33678 - If the pendulum < -2 or > 2, then do nothing.
33680 Note: This code covers the most common scenarios. There exist non
33681 load/store instructions which make use of the LSU and which
33682 would need to be accounted for to strictly model the behavior
33683 of the machine. Those instructions are currently unaccounted
33684 for to help minimize compile time overhead of this code.
33686 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
33688 int pos;
33689 int i;
33690 rtx_insn *tmp;
33691 rtx load_mem, str_mem;
33693 if (is_store_insn (last_scheduled_insn, &str_mem))
33694 /* Issuing a store, swing the load_store_pendulum to the left */
33695 load_store_pendulum--;
33696 else if (is_load_insn (last_scheduled_insn, &load_mem))
33697 /* Issuing a load, swing the load_store_pendulum to the right */
33698 load_store_pendulum++;
33699 else
33700 return cached_can_issue_more;
33702 /* If the pendulum is balanced, or there is only one instruction on
33703 the ready list, then all is well, so return. */
33704 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
33705 return cached_can_issue_more;
33707 if (load_store_pendulum == 1)
33709 /* A load has been issued in this cycle. Scan the ready list
33710 for another load to issue with it */
33711 pos = *pn_ready-1;
33713 while (pos >= 0)
33715 if (is_load_insn (ready[pos], &load_mem))
33717 /* Found a load. Move it to the head of the ready list,
33718 and adjust it's priority so that it is more likely to
33719 stay there */
33720 tmp = ready[pos];
33721 for (i=pos; i<*pn_ready-1; i++)
33722 ready[i] = ready[i + 1];
33723 ready[*pn_ready-1] = tmp;
33725 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
33726 INSN_PRIORITY (tmp)++;
33727 break;
33729 pos--;
33732 else if (load_store_pendulum == -2)
33734 /* Two stores have been issued in this cycle. Increase the
33735 priority of the first load in the ready list to favor it for
33736 issuing in the next cycle. */
33737 pos = *pn_ready-1;
33739 while (pos >= 0)
33741 if (is_load_insn (ready[pos], &load_mem)
33742 && !sel_sched_p ()
33743 && INSN_PRIORITY_KNOWN (ready[pos]))
33745 INSN_PRIORITY (ready[pos])++;
33747 /* Adjust the pendulum to account for the fact that a load
33748 was found and increased in priority. This is to prevent
33749 increasing the priority of multiple loads */
33750 load_store_pendulum--;
33752 break;
33754 pos--;
33757 else if (load_store_pendulum == -1)
33759 /* A store has been issued in this cycle. Scan the ready list for
33760 another store to issue with it, preferring a store to an adjacent
33761 memory location */
33762 int first_store_pos = -1;
33764 pos = *pn_ready-1;
33766 while (pos >= 0)
33768 if (is_store_insn (ready[pos], &str_mem))
33770 rtx str_mem2;
33771 /* Maintain the index of the first store found on the
33772 list */
33773 if (first_store_pos == -1)
33774 first_store_pos = pos;
33776 if (is_store_insn (last_scheduled_insn, &str_mem2)
33777 && adjacent_mem_locations (str_mem, str_mem2))
33779 /* Found an adjacent store. Move it to the head of the
33780 ready list, and adjust it's priority so that it is
33781 more likely to stay there */
33782 tmp = ready[pos];
33783 for (i=pos; i<*pn_ready-1; i++)
33784 ready[i] = ready[i + 1];
33785 ready[*pn_ready-1] = tmp;
33787 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
33788 INSN_PRIORITY (tmp)++;
33790 first_store_pos = -1;
33792 break;
33795 pos--;
33798 if (first_store_pos >= 0)
33800 /* An adjacent store wasn't found, but a non-adjacent store was,
33801 so move the non-adjacent store to the front of the ready
33802 list, and adjust its priority so that it is more likely to
33803 stay there. */
33804 tmp = ready[first_store_pos];
33805 for (i=first_store_pos; i<*pn_ready-1; i++)
33806 ready[i] = ready[i + 1];
33807 ready[*pn_ready-1] = tmp;
33808 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
33809 INSN_PRIORITY (tmp)++;
33812 else if (load_store_pendulum == 2)
33814 /* Two loads have been issued in this cycle. Increase the priority
33815 of the first store in the ready list to favor it for issuing in
33816 the next cycle. */
33817 pos = *pn_ready-1;
33819 while (pos >= 0)
33821 if (is_store_insn (ready[pos], &str_mem)
33822 && !sel_sched_p ()
33823 && INSN_PRIORITY_KNOWN (ready[pos]))
33825 INSN_PRIORITY (ready[pos])++;
33827 /* Adjust the pendulum to account for the fact that a store
33828 was found and increased in priority. This is to prevent
33829 increasing the priority of multiple stores */
33830 load_store_pendulum++;
33832 break;
33834 pos--;
33839 /* Do Power9 dependent reordering if necessary. */
33840 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
33841 && recog_memoized (last_scheduled_insn) >= 0)
33842 return power9_sched_reorder2 (ready, *pn_ready - 1);
33844 return cached_can_issue_more;
33847 /* Return whether the presence of INSN causes a dispatch group termination
33848 of group WHICH_GROUP.
33850 If WHICH_GROUP == current_group, this function will return true if INSN
33851 causes the termination of the current group (i.e, the dispatch group to
33852 which INSN belongs). This means that INSN will be the last insn in the
33853 group it belongs to.
33855 If WHICH_GROUP == previous_group, this function will return true if INSN
33856 causes the termination of the previous group (i.e, the dispatch group that
33857 precedes the group to which INSN belongs). This means that INSN will be
33858 the first insn in the group it belongs to). */
33860 static bool
33861 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
33863 bool first, last;
33865 if (! insn)
33866 return false;
33868 first = insn_must_be_first_in_group (insn);
33869 last = insn_must_be_last_in_group (insn);
33871 if (first && last)
33872 return true;
33874 if (which_group == current_group)
33875 return last;
33876 else if (which_group == previous_group)
33877 return first;
33879 return false;
33883 static bool
33884 insn_must_be_first_in_group (rtx_insn *insn)
33886 enum attr_type type;
33888 if (!insn
33889 || NOTE_P (insn)
33890 || DEBUG_INSN_P (insn)
33891 || GET_CODE (PATTERN (insn)) == USE
33892 || GET_CODE (PATTERN (insn)) == CLOBBER)
33893 return false;
33895 switch (rs6000_cpu)
33897 case PROCESSOR_POWER5:
33898 if (is_cracked_insn (insn))
33899 return true;
33900 /* FALLTHRU */
33901 case PROCESSOR_POWER4:
33902 if (is_microcoded_insn (insn))
33903 return true;
33905 if (!rs6000_sched_groups)
33906 return false;
33908 type = get_attr_type (insn);
33910 switch (type)
33912 case TYPE_MFCR:
33913 case TYPE_MFCRF:
33914 case TYPE_MTCR:
33915 case TYPE_DELAYED_CR:
33916 case TYPE_CR_LOGICAL:
33917 case TYPE_MTJMPR:
33918 case TYPE_MFJMPR:
33919 case TYPE_DIV:
33920 case TYPE_LOAD_L:
33921 case TYPE_STORE_C:
33922 case TYPE_ISYNC:
33923 case TYPE_SYNC:
33924 return true;
33925 default:
33926 break;
33928 break;
33929 case PROCESSOR_POWER6:
33930 type = get_attr_type (insn);
33932 switch (type)
33934 case TYPE_EXTS:
33935 case TYPE_CNTLZ:
33936 case TYPE_TRAP:
33937 case TYPE_MUL:
33938 case TYPE_INSERT:
33939 case TYPE_FPCOMPARE:
33940 case TYPE_MFCR:
33941 case TYPE_MTCR:
33942 case TYPE_MFJMPR:
33943 case TYPE_MTJMPR:
33944 case TYPE_ISYNC:
33945 case TYPE_SYNC:
33946 case TYPE_LOAD_L:
33947 case TYPE_STORE_C:
33948 return true;
33949 case TYPE_SHIFT:
33950 if (get_attr_dot (insn) == DOT_NO
33951 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33952 return true;
33953 else
33954 break;
33955 case TYPE_DIV:
33956 if (get_attr_size (insn) == SIZE_32)
33957 return true;
33958 else
33959 break;
33960 case TYPE_LOAD:
33961 case TYPE_STORE:
33962 case TYPE_FPLOAD:
33963 case TYPE_FPSTORE:
33964 if (get_attr_update (insn) == UPDATE_YES)
33965 return true;
33966 else
33967 break;
33968 default:
33969 break;
33971 break;
33972 case PROCESSOR_POWER7:
33973 type = get_attr_type (insn);
33975 switch (type)
33977 case TYPE_CR_LOGICAL:
33978 case TYPE_MFCR:
33979 case TYPE_MFCRF:
33980 case TYPE_MTCR:
33981 case TYPE_DIV:
33982 case TYPE_ISYNC:
33983 case TYPE_LOAD_L:
33984 case TYPE_STORE_C:
33985 case TYPE_MFJMPR:
33986 case TYPE_MTJMPR:
33987 return true;
33988 case TYPE_MUL:
33989 case TYPE_SHIFT:
33990 case TYPE_EXTS:
33991 if (get_attr_dot (insn) == DOT_YES)
33992 return true;
33993 else
33994 break;
33995 case TYPE_LOAD:
33996 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33997 || get_attr_update (insn) == UPDATE_YES)
33998 return true;
33999 else
34000 break;
34001 case TYPE_STORE:
34002 case TYPE_FPLOAD:
34003 case TYPE_FPSTORE:
34004 if (get_attr_update (insn) == UPDATE_YES)
34005 return true;
34006 else
34007 break;
34008 default:
34009 break;
34011 break;
34012 case PROCESSOR_POWER8:
34013 type = get_attr_type (insn);
34015 switch (type)
34017 case TYPE_CR_LOGICAL:
34018 case TYPE_DELAYED_CR:
34019 case TYPE_MFCR:
34020 case TYPE_MFCRF:
34021 case TYPE_MTCR:
34022 case TYPE_SYNC:
34023 case TYPE_ISYNC:
34024 case TYPE_LOAD_L:
34025 case TYPE_STORE_C:
34026 case TYPE_VECSTORE:
34027 case TYPE_MFJMPR:
34028 case TYPE_MTJMPR:
34029 return true;
34030 case TYPE_SHIFT:
34031 case TYPE_EXTS:
34032 case TYPE_MUL:
34033 if (get_attr_dot (insn) == DOT_YES)
34034 return true;
34035 else
34036 break;
34037 case TYPE_LOAD:
34038 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34039 || get_attr_update (insn) == UPDATE_YES)
34040 return true;
34041 else
34042 break;
34043 case TYPE_STORE:
34044 if (get_attr_update (insn) == UPDATE_YES
34045 && get_attr_indexed (insn) == INDEXED_YES)
34046 return true;
34047 else
34048 break;
34049 default:
34050 break;
34052 break;
34053 default:
34054 break;
34057 return false;
34060 static bool
34061 insn_must_be_last_in_group (rtx_insn *insn)
34063 enum attr_type type;
34065 if (!insn
34066 || NOTE_P (insn)
34067 || DEBUG_INSN_P (insn)
34068 || GET_CODE (PATTERN (insn)) == USE
34069 || GET_CODE (PATTERN (insn)) == CLOBBER)
34070 return false;
34072 switch (rs6000_cpu) {
34073 case PROCESSOR_POWER4:
34074 case PROCESSOR_POWER5:
34075 if (is_microcoded_insn (insn))
34076 return true;
34078 if (is_branch_slot_insn (insn))
34079 return true;
34081 break;
34082 case PROCESSOR_POWER6:
34083 type = get_attr_type (insn);
34085 switch (type)
34087 case TYPE_EXTS:
34088 case TYPE_CNTLZ:
34089 case TYPE_TRAP:
34090 case TYPE_MUL:
34091 case TYPE_FPCOMPARE:
34092 case TYPE_MFCR:
34093 case TYPE_MTCR:
34094 case TYPE_MFJMPR:
34095 case TYPE_MTJMPR:
34096 case TYPE_ISYNC:
34097 case TYPE_SYNC:
34098 case TYPE_LOAD_L:
34099 case TYPE_STORE_C:
34100 return true;
34101 case TYPE_SHIFT:
34102 if (get_attr_dot (insn) == DOT_NO
34103 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34104 return true;
34105 else
34106 break;
34107 case TYPE_DIV:
34108 if (get_attr_size (insn) == SIZE_32)
34109 return true;
34110 else
34111 break;
34112 default:
34113 break;
34115 break;
34116 case PROCESSOR_POWER7:
34117 type = get_attr_type (insn);
34119 switch (type)
34121 case TYPE_ISYNC:
34122 case TYPE_SYNC:
34123 case TYPE_LOAD_L:
34124 case TYPE_STORE_C:
34125 return true;
34126 case TYPE_LOAD:
34127 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34128 && get_attr_update (insn) == UPDATE_YES)
34129 return true;
34130 else
34131 break;
34132 case TYPE_STORE:
34133 if (get_attr_update (insn) == UPDATE_YES
34134 && get_attr_indexed (insn) == INDEXED_YES)
34135 return true;
34136 else
34137 break;
34138 default:
34139 break;
34141 break;
34142 case PROCESSOR_POWER8:
34143 type = get_attr_type (insn);
34145 switch (type)
34147 case TYPE_MFCR:
34148 case TYPE_MTCR:
34149 case TYPE_ISYNC:
34150 case TYPE_SYNC:
34151 case TYPE_LOAD_L:
34152 case TYPE_STORE_C:
34153 return true;
34154 case TYPE_LOAD:
34155 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34156 && get_attr_update (insn) == UPDATE_YES)
34157 return true;
34158 else
34159 break;
34160 case TYPE_STORE:
34161 if (get_attr_update (insn) == UPDATE_YES
34162 && get_attr_indexed (insn) == INDEXED_YES)
34163 return true;
34164 else
34165 break;
34166 default:
34167 break;
34169 break;
34170 default:
34171 break;
34174 return false;
34177 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
34178 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
34180 static bool
34181 is_costly_group (rtx *group_insns, rtx next_insn)
34183 int i;
34184 int issue_rate = rs6000_issue_rate ();
34186 for (i = 0; i < issue_rate; i++)
34188 sd_iterator_def sd_it;
34189 dep_t dep;
34190 rtx insn = group_insns[i];
34192 if (!insn)
34193 continue;
34195 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
34197 rtx next = DEP_CON (dep);
34199 if (next == next_insn
34200 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
34201 return true;
34205 return false;
34208 /* Utility of the function redefine_groups.
34209 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
34210 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
34211 to keep it "far" (in a separate group) from GROUP_INSNS, following
34212 one of the following schemes, depending on the value of the flag
34213 -minsert_sched_nops = X:
34214 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
34215 in order to force NEXT_INSN into a separate group.
34216 (2) X < sched_finish_regroup_exact: insert exactly X nops.
34217 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
34218 insertion (has a group just ended, how many vacant issue slots remain in the
34219 last group, and how many dispatch groups were encountered so far). */
34221 static int
34222 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
34223 rtx_insn *next_insn, bool *group_end, int can_issue_more,
34224 int *group_count)
34226 rtx nop;
34227 bool force;
34228 int issue_rate = rs6000_issue_rate ();
34229 bool end = *group_end;
34230 int i;
34232 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
34233 return can_issue_more;
34235 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
34236 return can_issue_more;
34238 force = is_costly_group (group_insns, next_insn);
34239 if (!force)
34240 return can_issue_more;
34242 if (sched_verbose > 6)
34243 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
34244 *group_count ,can_issue_more);
34246 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
34248 if (*group_end)
34249 can_issue_more = 0;
34251 /* Since only a branch can be issued in the last issue_slot, it is
34252 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
34253 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
34254 in this case the last nop will start a new group and the branch
34255 will be forced to the new group. */
34256 if (can_issue_more && !is_branch_slot_insn (next_insn))
34257 can_issue_more--;
34259 /* Do we have a special group ending nop? */
34260 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
34261 || rs6000_cpu_attr == CPU_POWER8)
34263 nop = gen_group_ending_nop ();
34264 emit_insn_before (nop, next_insn);
34265 can_issue_more = 0;
34267 else
34268 while (can_issue_more > 0)
34270 nop = gen_nop ();
34271 emit_insn_before (nop, next_insn);
34272 can_issue_more--;
34275 *group_end = true;
34276 return 0;
34279 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
34281 int n_nops = rs6000_sched_insert_nops;
34283 /* Nops can't be issued from the branch slot, so the effective
34284 issue_rate for nops is 'issue_rate - 1'. */
34285 if (can_issue_more == 0)
34286 can_issue_more = issue_rate;
34287 can_issue_more--;
34288 if (can_issue_more == 0)
34290 can_issue_more = issue_rate - 1;
34291 (*group_count)++;
34292 end = true;
34293 for (i = 0; i < issue_rate; i++)
34295 group_insns[i] = 0;
34299 while (n_nops > 0)
34301 nop = gen_nop ();
34302 emit_insn_before (nop, next_insn);
34303 if (can_issue_more == issue_rate - 1) /* new group begins */
34304 end = false;
34305 can_issue_more--;
34306 if (can_issue_more == 0)
34308 can_issue_more = issue_rate - 1;
34309 (*group_count)++;
34310 end = true;
34311 for (i = 0; i < issue_rate; i++)
34313 group_insns[i] = 0;
34316 n_nops--;
34319 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
34320 can_issue_more++;
34322 /* Is next_insn going to start a new group? */
34323 *group_end
34324 = (end
34325 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
34326 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
34327 || (can_issue_more < issue_rate &&
34328 insn_terminates_group_p (next_insn, previous_group)));
34329 if (*group_end && end)
34330 (*group_count)--;
34332 if (sched_verbose > 6)
34333 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
34334 *group_count, can_issue_more);
34335 return can_issue_more;
34338 return can_issue_more;
34341 /* This function tries to synch the dispatch groups that the compiler "sees"
34342 with the dispatch groups that the processor dispatcher is expected to
34343 form in practice. It tries to achieve this synchronization by forcing the
34344 estimated processor grouping on the compiler (as opposed to the function
34345 'pad_goups' which tries to force the scheduler's grouping on the processor).
34347 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
34348 examines the (estimated) dispatch groups that will be formed by the processor
34349 dispatcher. It marks these group boundaries to reflect the estimated
34350 processor grouping, overriding the grouping that the scheduler had marked.
34351 Depending on the value of the flag '-minsert-sched-nops' this function can
34352 force certain insns into separate groups or force a certain distance between
34353 them by inserting nops, for example, if there exists a "costly dependence"
34354 between the insns.
34356 The function estimates the group boundaries that the processor will form as
34357 follows: It keeps track of how many vacant issue slots are available after
34358 each insn. A subsequent insn will start a new group if one of the following
34359 4 cases applies:
34360 - no more vacant issue slots remain in the current dispatch group.
34361 - only the last issue slot, which is the branch slot, is vacant, but the next
34362 insn is not a branch.
34363 - only the last 2 or less issue slots, including the branch slot, are vacant,
34364 which means that a cracked insn (which occupies two issue slots) can't be
34365 issued in this group.
34366 - less than 'issue_rate' slots are vacant, and the next insn always needs to
34367 start a new group. */
34369 static int
34370 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
34371 rtx_insn *tail)
34373 rtx_insn *insn, *next_insn;
34374 int issue_rate;
34375 int can_issue_more;
34376 int slot, i;
34377 bool group_end;
34378 int group_count = 0;
34379 rtx *group_insns;
34381 /* Initialize. */
34382 issue_rate = rs6000_issue_rate ();
34383 group_insns = XALLOCAVEC (rtx, issue_rate);
34384 for (i = 0; i < issue_rate; i++)
34386 group_insns[i] = 0;
34388 can_issue_more = issue_rate;
34389 slot = 0;
34390 insn = get_next_active_insn (prev_head_insn, tail);
34391 group_end = false;
34393 while (insn != NULL_RTX)
34395 slot = (issue_rate - can_issue_more);
34396 group_insns[slot] = insn;
34397 can_issue_more =
34398 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
34399 if (insn_terminates_group_p (insn, current_group))
34400 can_issue_more = 0;
34402 next_insn = get_next_active_insn (insn, tail);
34403 if (next_insn == NULL_RTX)
34404 return group_count + 1;
34406 /* Is next_insn going to start a new group? */
34407 group_end
34408 = (can_issue_more == 0
34409 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
34410 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
34411 || (can_issue_more < issue_rate &&
34412 insn_terminates_group_p (next_insn, previous_group)));
34414 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
34415 next_insn, &group_end, can_issue_more,
34416 &group_count);
34418 if (group_end)
34420 group_count++;
34421 can_issue_more = 0;
34422 for (i = 0; i < issue_rate; i++)
34424 group_insns[i] = 0;
34428 if (GET_MODE (next_insn) == TImode && can_issue_more)
34429 PUT_MODE (next_insn, VOIDmode);
34430 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
34431 PUT_MODE (next_insn, TImode);
34433 insn = next_insn;
34434 if (can_issue_more == 0)
34435 can_issue_more = issue_rate;
34436 } /* while */
34438 return group_count;
34441 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
34442 dispatch group boundaries that the scheduler had marked. Pad with nops
34443 any dispatch groups which have vacant issue slots, in order to force the
34444 scheduler's grouping on the processor dispatcher. The function
34445 returns the number of dispatch groups found. */
34447 static int
34448 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
34449 rtx_insn *tail)
34451 rtx_insn *insn, *next_insn;
34452 rtx nop;
34453 int issue_rate;
34454 int can_issue_more;
34455 int group_end;
34456 int group_count = 0;
34458 /* Initialize issue_rate. */
34459 issue_rate = rs6000_issue_rate ();
34460 can_issue_more = issue_rate;
34462 insn = get_next_active_insn (prev_head_insn, tail);
34463 next_insn = get_next_active_insn (insn, tail);
34465 while (insn != NULL_RTX)
34467 can_issue_more =
34468 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
34470 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
34472 if (next_insn == NULL_RTX)
34473 break;
34475 if (group_end)
34477 /* If the scheduler had marked group termination at this location
34478 (between insn and next_insn), and neither insn nor next_insn will
34479 force group termination, pad the group with nops to force group
34480 termination. */
34481 if (can_issue_more
34482 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
34483 && !insn_terminates_group_p (insn, current_group)
34484 && !insn_terminates_group_p (next_insn, previous_group))
34486 if (!is_branch_slot_insn (next_insn))
34487 can_issue_more--;
34489 while (can_issue_more)
34491 nop = gen_nop ();
34492 emit_insn_before (nop, next_insn);
34493 can_issue_more--;
34497 can_issue_more = issue_rate;
34498 group_count++;
34501 insn = next_insn;
34502 next_insn = get_next_active_insn (insn, tail);
34505 return group_count;
34508 /* We're beginning a new block. Initialize data structures as necessary. */
34510 static void
34511 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
34512 int sched_verbose ATTRIBUTE_UNUSED,
34513 int max_ready ATTRIBUTE_UNUSED)
34515 last_scheduled_insn = NULL;
34516 load_store_pendulum = 0;
34517 divide_cnt = 0;
34518 vec_load_pendulum = 0;
34521 /* The following function is called at the end of scheduling BB.
34522 After reload, it inserts nops at insn group bundling. */
34524 static void
34525 rs6000_sched_finish (FILE *dump, int sched_verbose)
34527 int n_groups;
34529 if (sched_verbose)
34530 fprintf (dump, "=== Finishing schedule.\n");
34532 if (reload_completed && rs6000_sched_groups)
34534 /* Do not run sched_finish hook when selective scheduling enabled. */
34535 if (sel_sched_p ())
34536 return;
34538 if (rs6000_sched_insert_nops == sched_finish_none)
34539 return;
34541 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
34542 n_groups = pad_groups (dump, sched_verbose,
34543 current_sched_info->prev_head,
34544 current_sched_info->next_tail);
34545 else
34546 n_groups = redefine_groups (dump, sched_verbose,
34547 current_sched_info->prev_head,
34548 current_sched_info->next_tail);
34550 if (sched_verbose >= 6)
34552 fprintf (dump, "ngroups = %d\n", n_groups);
34553 print_rtl (dump, current_sched_info->prev_head);
34554 fprintf (dump, "Done finish_sched\n");
34559 struct rs6000_sched_context
34561 short cached_can_issue_more;
34562 rtx_insn *last_scheduled_insn;
34563 int load_store_pendulum;
34564 int divide_cnt;
34565 int vec_load_pendulum;
34568 typedef struct rs6000_sched_context rs6000_sched_context_def;
34569 typedef rs6000_sched_context_def *rs6000_sched_context_t;
34571 /* Allocate store for new scheduling context. */
34572 static void *
34573 rs6000_alloc_sched_context (void)
34575 return xmalloc (sizeof (rs6000_sched_context_def));
34578 /* If CLEAN_P is true then initializes _SC with clean data,
34579 and from the global context otherwise. */
34580 static void
34581 rs6000_init_sched_context (void *_sc, bool clean_p)
34583 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
34585 if (clean_p)
34587 sc->cached_can_issue_more = 0;
34588 sc->last_scheduled_insn = NULL;
34589 sc->load_store_pendulum = 0;
34590 sc->divide_cnt = 0;
34591 sc->vec_load_pendulum = 0;
34593 else
34595 sc->cached_can_issue_more = cached_can_issue_more;
34596 sc->last_scheduled_insn = last_scheduled_insn;
34597 sc->load_store_pendulum = load_store_pendulum;
34598 sc->divide_cnt = divide_cnt;
34599 sc->vec_load_pendulum = vec_load_pendulum;
34603 /* Sets the global scheduling context to the one pointed to by _SC. */
34604 static void
34605 rs6000_set_sched_context (void *_sc)
34607 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
34609 gcc_assert (sc != NULL);
34611 cached_can_issue_more = sc->cached_can_issue_more;
34612 last_scheduled_insn = sc->last_scheduled_insn;
34613 load_store_pendulum = sc->load_store_pendulum;
34614 divide_cnt = sc->divide_cnt;
34615 vec_load_pendulum = sc->vec_load_pendulum;
34618 /* Free _SC. */
34619 static void
34620 rs6000_free_sched_context (void *_sc)
34622 gcc_assert (_sc != NULL);
34624 free (_sc);
34628 /* Length in units of the trampoline for entering a nested function. */
34631 rs6000_trampoline_size (void)
34633 int ret = 0;
34635 switch (DEFAULT_ABI)
34637 default:
34638 gcc_unreachable ();
34640 case ABI_AIX:
34641 ret = (TARGET_32BIT) ? 12 : 24;
34642 break;
34644 case ABI_ELFv2:
34645 gcc_assert (!TARGET_32BIT);
34646 ret = 32;
34647 break;
34649 case ABI_DARWIN:
34650 case ABI_V4:
34651 ret = (TARGET_32BIT) ? 40 : 48;
34652 break;
34655 return ret;
34658 /* Emit RTL insns to initialize the variable parts of a trampoline.
34659 FNADDR is an RTX for the address of the function's pure code.
34660 CXT is an RTX for the static chain value for the function. */
34662 static void
34663 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
34665 int regsize = (TARGET_32BIT) ? 4 : 8;
34666 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
34667 rtx ctx_reg = force_reg (Pmode, cxt);
34668 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
34670 switch (DEFAULT_ABI)
34672 default:
34673 gcc_unreachable ();
34675 /* Under AIX, just build the 3 word function descriptor */
34676 case ABI_AIX:
34678 rtx fnmem, fn_reg, toc_reg;
34680 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
34681 error ("You cannot take the address of a nested function if you use "
34682 "the -mno-pointers-to-nested-functions option.");
34684 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
34685 fn_reg = gen_reg_rtx (Pmode);
34686 toc_reg = gen_reg_rtx (Pmode);
34688 /* Macro to shorten the code expansions below. */
34689 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
34691 m_tramp = replace_equiv_address (m_tramp, addr);
34693 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
34694 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
34695 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
34696 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
34697 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
34699 # undef MEM_PLUS
34701 break;
34703 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
34704 case ABI_ELFv2:
34705 case ABI_DARWIN:
34706 case ABI_V4:
34707 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
34708 LCT_NORMAL, VOIDmode, 4,
34709 addr, Pmode,
34710 GEN_INT (rs6000_trampoline_size ()), SImode,
34711 fnaddr, Pmode,
34712 ctx_reg, Pmode);
34713 break;
34718 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
34719 identifier as an argument, so the front end shouldn't look it up. */
34721 static bool
34722 rs6000_attribute_takes_identifier_p (const_tree attr_id)
34724 return is_attribute_p ("altivec", attr_id);
34727 /* Handle the "altivec" attribute. The attribute may have
34728 arguments as follows:
34730 __attribute__((altivec(vector__)))
34731 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
34732 __attribute__((altivec(bool__))) (always followed by 'unsigned')
34734 and may appear more than once (e.g., 'vector bool char') in a
34735 given declaration. */
34737 static tree
34738 rs6000_handle_altivec_attribute (tree *node,
34739 tree name ATTRIBUTE_UNUSED,
34740 tree args,
34741 int flags ATTRIBUTE_UNUSED,
34742 bool *no_add_attrs)
34744 tree type = *node, result = NULL_TREE;
34745 machine_mode mode;
34746 int unsigned_p;
34747 char altivec_type
34748 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
34749 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
34750 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
34751 : '?');
34753 while (POINTER_TYPE_P (type)
34754 || TREE_CODE (type) == FUNCTION_TYPE
34755 || TREE_CODE (type) == METHOD_TYPE
34756 || TREE_CODE (type) == ARRAY_TYPE)
34757 type = TREE_TYPE (type);
34759 mode = TYPE_MODE (type);
34761 /* Check for invalid AltiVec type qualifiers. */
34762 if (type == long_double_type_node)
34763 error ("use of %<long double%> in AltiVec types is invalid");
34764 else if (type == boolean_type_node)
34765 error ("use of boolean types in AltiVec types is invalid");
34766 else if (TREE_CODE (type) == COMPLEX_TYPE)
34767 error ("use of %<complex%> in AltiVec types is invalid");
34768 else if (DECIMAL_FLOAT_MODE_P (mode))
34769 error ("use of decimal floating point types in AltiVec types is invalid");
34770 else if (!TARGET_VSX)
34772 if (type == long_unsigned_type_node || type == long_integer_type_node)
34774 if (TARGET_64BIT)
34775 error ("use of %<long%> in AltiVec types is invalid for "
34776 "64-bit code without -mvsx");
34777 else if (rs6000_warn_altivec_long)
34778 warning (0, "use of %<long%> in AltiVec types is deprecated; "
34779 "use %<int%>");
34781 else if (type == long_long_unsigned_type_node
34782 || type == long_long_integer_type_node)
34783 error ("use of %<long long%> in AltiVec types is invalid without "
34784 "-mvsx");
34785 else if (type == double_type_node)
34786 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
34789 switch (altivec_type)
34791 case 'v':
34792 unsigned_p = TYPE_UNSIGNED (type);
34793 switch (mode)
34795 case TImode:
34796 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
34797 break;
34798 case DImode:
34799 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
34800 break;
34801 case SImode:
34802 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
34803 break;
34804 case HImode:
34805 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
34806 break;
34807 case QImode:
34808 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
34809 break;
34810 case SFmode: result = V4SF_type_node; break;
34811 case DFmode: result = V2DF_type_node; break;
34812 /* If the user says 'vector int bool', we may be handed the 'bool'
34813 attribute _before_ the 'vector' attribute, and so select the
34814 proper type in the 'b' case below. */
34815 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
34816 case V2DImode: case V2DFmode:
34817 result = type;
34818 default: break;
34820 break;
34821 case 'b':
34822 switch (mode)
34824 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
34825 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
34826 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
34827 case QImode: case V16QImode: result = bool_V16QI_type_node;
34828 default: break;
34830 break;
34831 case 'p':
34832 switch (mode)
34834 case V8HImode: result = pixel_V8HI_type_node;
34835 default: break;
34837 default: break;
34840 /* Propagate qualifiers attached to the element type
34841 onto the vector type. */
34842 if (result && result != type && TYPE_QUALS (type))
34843 result = build_qualified_type (result, TYPE_QUALS (type));
34845 *no_add_attrs = true; /* No need to hang on to the attribute. */
34847 if (result)
34848 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
34850 return NULL_TREE;
34853 /* AltiVec defines four built-in scalar types that serve as vector
34854 elements; we must teach the compiler how to mangle them. */
34856 static const char *
34857 rs6000_mangle_type (const_tree type)
34859 type = TYPE_MAIN_VARIANT (type);
34861 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34862 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34863 return NULL;
34865 if (type == bool_char_type_node) return "U6__boolc";
34866 if (type == bool_short_type_node) return "U6__bools";
34867 if (type == pixel_type_node) return "u7__pixel";
34868 if (type == bool_int_type_node) return "U6__booli";
34869 if (type == bool_long_type_node) return "U6__booll";
34871 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
34872 "g" for IBM extended double, no matter whether it is long double (using
34873 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
34874 if (TARGET_FLOAT128_TYPE)
34876 if (type == ieee128_float_type_node)
34877 return "U10__float128";
34879 if (type == ibm128_float_type_node)
34880 return "g";
34882 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
34883 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
34886 /* Mangle IBM extended float long double as `g' (__float128) on
34887 powerpc*-linux where long-double-64 previously was the default. */
34888 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
34889 && TARGET_ELF
34890 && TARGET_LONG_DOUBLE_128
34891 && !TARGET_IEEEQUAD)
34892 return "g";
34894 /* For all other types, use normal C++ mangling. */
34895 return NULL;
34898 /* Handle a "longcall" or "shortcall" attribute; arguments as in
34899 struct attribute_spec.handler. */
34901 static tree
34902 rs6000_handle_longcall_attribute (tree *node, tree name,
34903 tree args ATTRIBUTE_UNUSED,
34904 int flags ATTRIBUTE_UNUSED,
34905 bool *no_add_attrs)
34907 if (TREE_CODE (*node) != FUNCTION_TYPE
34908 && TREE_CODE (*node) != FIELD_DECL
34909 && TREE_CODE (*node) != TYPE_DECL)
34911 warning (OPT_Wattributes, "%qE attribute only applies to functions",
34912 name);
34913 *no_add_attrs = true;
34916 return NULL_TREE;
34919 /* Set longcall attributes on all functions declared when
34920 rs6000_default_long_calls is true. */
34921 static void
34922 rs6000_set_default_type_attributes (tree type)
34924 if (rs6000_default_long_calls
34925 && (TREE_CODE (type) == FUNCTION_TYPE
34926 || TREE_CODE (type) == METHOD_TYPE))
34927 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
34928 NULL_TREE,
34929 TYPE_ATTRIBUTES (type));
34931 #if TARGET_MACHO
34932 darwin_set_default_type_attributes (type);
34933 #endif
34936 /* Return a reference suitable for calling a function with the
34937 longcall attribute. */
34940 rs6000_longcall_ref (rtx call_ref)
34942 const char *call_name;
34943 tree node;
34945 if (GET_CODE (call_ref) != SYMBOL_REF)
34946 return call_ref;
34948 /* System V adds '.' to the internal name, so skip them. */
34949 call_name = XSTR (call_ref, 0);
34950 if (*call_name == '.')
34952 while (*call_name == '.')
34953 call_name++;
34955 node = get_identifier (call_name);
34956 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
34959 return force_reg (Pmode, call_ref);
34962 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
34963 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
34964 #endif
34966 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34967 struct attribute_spec.handler. */
34968 static tree
34969 rs6000_handle_struct_attribute (tree *node, tree name,
34970 tree args ATTRIBUTE_UNUSED,
34971 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
34973 tree *type = NULL;
34974 if (DECL_P (*node))
34976 if (TREE_CODE (*node) == TYPE_DECL)
34977 type = &TREE_TYPE (*node);
34979 else
34980 type = node;
34982 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
34983 || TREE_CODE (*type) == UNION_TYPE)))
34985 warning (OPT_Wattributes, "%qE attribute ignored", name);
34986 *no_add_attrs = true;
34989 else if ((is_attribute_p ("ms_struct", name)
34990 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
34991 || ((is_attribute_p ("gcc_struct", name)
34992 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
34994 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
34995 name);
34996 *no_add_attrs = true;
34999 return NULL_TREE;
35002 static bool
35003 rs6000_ms_bitfield_layout_p (const_tree record_type)
35005 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
35006 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35007 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
35010 #ifdef USING_ELFOS_H
35012 /* A get_unnamed_section callback, used for switching to toc_section. */
35014 static void
35015 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35017 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35018 && TARGET_MINIMAL_TOC)
35020 if (!toc_initialized)
35022 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35023 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35024 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
35025 fprintf (asm_out_file, "\t.tc ");
35026 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
35027 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35028 fprintf (asm_out_file, "\n");
35030 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35031 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35032 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35033 fprintf (asm_out_file, " = .+32768\n");
35034 toc_initialized = 1;
35036 else
35037 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35039 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35041 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35042 if (!toc_initialized)
35044 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35045 toc_initialized = 1;
35048 else
35050 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35051 if (!toc_initialized)
35053 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35054 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35055 fprintf (asm_out_file, " = .+32768\n");
35056 toc_initialized = 1;
35061 /* Implement TARGET_ASM_INIT_SECTIONS. */
35063 static void
35064 rs6000_elf_asm_init_sections (void)
35066 toc_section
35067 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
35069 sdata2_section
35070 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
35071 SDATA2_SECTION_ASM_OP);
35074 /* Implement TARGET_SELECT_RTX_SECTION. */
35076 static section *
35077 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
35078 unsigned HOST_WIDE_INT align)
35080 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35081 return toc_section;
35082 else
35083 return default_elf_select_rtx_section (mode, x, align);
35086 /* For a SYMBOL_REF, set generic flags and then perform some
35087 target-specific processing.
35089 When the AIX ABI is requested on a non-AIX system, replace the
35090 function name with the real name (with a leading .) rather than the
35091 function descriptor name. This saves a lot of overriding code to
35092 read the prefixes. */
35094 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
35095 static void
35096 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
35098 default_encode_section_info (decl, rtl, first);
35100 if (first
35101 && TREE_CODE (decl) == FUNCTION_DECL
35102 && !TARGET_AIX
35103 && DEFAULT_ABI == ABI_AIX)
35105 rtx sym_ref = XEXP (rtl, 0);
35106 size_t len = strlen (XSTR (sym_ref, 0));
35107 char *str = XALLOCAVEC (char, len + 2);
35108 str[0] = '.';
35109 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
35110 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
35114 static inline bool
35115 compare_section_name (const char *section, const char *templ)
35117 int len;
35119 len = strlen (templ);
35120 return (strncmp (section, templ, len) == 0
35121 && (section[len] == 0 || section[len] == '.'));
35124 bool
35125 rs6000_elf_in_small_data_p (const_tree decl)
35127 if (rs6000_sdata == SDATA_NONE)
35128 return false;
35130 /* We want to merge strings, so we never consider them small data. */
35131 if (TREE_CODE (decl) == STRING_CST)
35132 return false;
35134 /* Functions are never in the small data area. */
35135 if (TREE_CODE (decl) == FUNCTION_DECL)
35136 return false;
35138 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
35140 const char *section = DECL_SECTION_NAME (decl);
35141 if (compare_section_name (section, ".sdata")
35142 || compare_section_name (section, ".sdata2")
35143 || compare_section_name (section, ".gnu.linkonce.s")
35144 || compare_section_name (section, ".sbss")
35145 || compare_section_name (section, ".sbss2")
35146 || compare_section_name (section, ".gnu.linkonce.sb")
35147 || strcmp (section, ".PPC.EMB.sdata0") == 0
35148 || strcmp (section, ".PPC.EMB.sbss0") == 0)
35149 return true;
35151 else
35153 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
35155 if (size > 0
35156 && size <= g_switch_value
35157 /* If it's not public, and we're not going to reference it there,
35158 there's no need to put it in the small data section. */
35159 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
35160 return true;
35163 return false;
35166 #endif /* USING_ELFOS_H */
35168 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
35170 static bool
35171 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
35173 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
35176 /* Do not place thread-local symbols refs in the object blocks. */
35178 static bool
35179 rs6000_use_blocks_for_decl_p (const_tree decl)
35181 return !DECL_THREAD_LOCAL_P (decl);
35184 /* Return a REG that occurs in ADDR with coefficient 1.
35185 ADDR can be effectively incremented by incrementing REG.
35187 r0 is special and we must not select it as an address
35188 register by this routine since our caller will try to
35189 increment the returned register via an "la" instruction. */
35192 find_addr_reg (rtx addr)
35194 while (GET_CODE (addr) == PLUS)
35196 if (GET_CODE (XEXP (addr, 0)) == REG
35197 && REGNO (XEXP (addr, 0)) != 0)
35198 addr = XEXP (addr, 0);
35199 else if (GET_CODE (XEXP (addr, 1)) == REG
35200 && REGNO (XEXP (addr, 1)) != 0)
35201 addr = XEXP (addr, 1);
35202 else if (CONSTANT_P (XEXP (addr, 0)))
35203 addr = XEXP (addr, 1);
35204 else if (CONSTANT_P (XEXP (addr, 1)))
35205 addr = XEXP (addr, 0);
35206 else
35207 gcc_unreachable ();
35209 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
35210 return addr;
35213 void
35214 rs6000_fatal_bad_address (rtx op)
35216 fatal_insn ("bad address", op);
35219 #if TARGET_MACHO
35221 typedef struct branch_island_d {
35222 tree function_name;
35223 tree label_name;
35224 int line_number;
35225 } branch_island;
35228 static vec<branch_island, va_gc> *branch_islands;
35230 /* Remember to generate a branch island for far calls to the given
35231 function. */
35233 static void
35234 add_compiler_branch_island (tree label_name, tree function_name,
35235 int line_number)
35237 branch_island bi = {function_name, label_name, line_number};
35238 vec_safe_push (branch_islands, bi);
35241 /* Generate far-jump branch islands for everything recorded in
35242 branch_islands. Invoked immediately after the last instruction of
35243 the epilogue has been emitted; the branch islands must be appended
35244 to, and contiguous with, the function body. Mach-O stubs are
35245 generated in machopic_output_stub(). */
35247 static void
35248 macho_branch_islands (void)
35250 char tmp_buf[512];
35252 while (!vec_safe_is_empty (branch_islands))
35254 branch_island *bi = &branch_islands->last ();
35255 const char *label = IDENTIFIER_POINTER (bi->label_name);
35256 const char *name = IDENTIFIER_POINTER (bi->function_name);
35257 char name_buf[512];
35258 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
35259 if (name[0] == '*' || name[0] == '&')
35260 strcpy (name_buf, name+1);
35261 else
35263 name_buf[0] = '_';
35264 strcpy (name_buf+1, name);
35266 strcpy (tmp_buf, "\n");
35267 strcat (tmp_buf, label);
35268 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
35269 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35270 dbxout_stabd (N_SLINE, bi->line_number);
35271 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
35272 if (flag_pic)
35274 if (TARGET_LINK_STACK)
35276 char name[32];
35277 get_ppc476_thunk_name (name);
35278 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
35279 strcat (tmp_buf, name);
35280 strcat (tmp_buf, "\n");
35281 strcat (tmp_buf, label);
35282 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
35284 else
35286 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
35287 strcat (tmp_buf, label);
35288 strcat (tmp_buf, "_pic\n");
35289 strcat (tmp_buf, label);
35290 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
35293 strcat (tmp_buf, "\taddis r11,r11,ha16(");
35294 strcat (tmp_buf, name_buf);
35295 strcat (tmp_buf, " - ");
35296 strcat (tmp_buf, label);
35297 strcat (tmp_buf, "_pic)\n");
35299 strcat (tmp_buf, "\tmtlr r0\n");
35301 strcat (tmp_buf, "\taddi r12,r11,lo16(");
35302 strcat (tmp_buf, name_buf);
35303 strcat (tmp_buf, " - ");
35304 strcat (tmp_buf, label);
35305 strcat (tmp_buf, "_pic)\n");
35307 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
35309 else
35311 strcat (tmp_buf, ":\nlis r12,hi16(");
35312 strcat (tmp_buf, name_buf);
35313 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
35314 strcat (tmp_buf, name_buf);
35315 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
35317 output_asm_insn (tmp_buf, 0);
35318 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
35319 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35320 dbxout_stabd (N_SLINE, bi->line_number);
35321 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
35322 branch_islands->pop ();
35326 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
35327 already there or not. */
35329 static int
35330 no_previous_def (tree function_name)
35332 branch_island *bi;
35333 unsigned ix;
35335 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
35336 if (function_name == bi->function_name)
35337 return 0;
35338 return 1;
35341 /* GET_PREV_LABEL gets the label name from the previous definition of
35342 the function. */
35344 static tree
35345 get_prev_label (tree function_name)
35347 branch_island *bi;
35348 unsigned ix;
35350 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
35351 if (function_name == bi->function_name)
35352 return bi->label_name;
35353 return NULL_TREE;
35356 /* INSN is either a function call or a millicode call. It may have an
35357 unconditional jump in its delay slot.
35359 CALL_DEST is the routine we are calling. */
35361 char *
35362 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
35363 int cookie_operand_number)
35365 static char buf[256];
35366 if (darwin_emit_branch_islands
35367 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
35368 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
35370 tree labelname;
35371 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
35373 if (no_previous_def (funname))
35375 rtx label_rtx = gen_label_rtx ();
35376 char *label_buf, temp_buf[256];
35377 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
35378 CODE_LABEL_NUMBER (label_rtx));
35379 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
35380 labelname = get_identifier (label_buf);
35381 add_compiler_branch_island (labelname, funname, insn_line (insn));
35383 else
35384 labelname = get_prev_label (funname);
35386 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
35387 instruction will reach 'foo', otherwise link as 'bl L42'".
35388 "L42" should be a 'branch island', that will do a far jump to
35389 'foo'. Branch islands are generated in
35390 macho_branch_islands(). */
35391 sprintf (buf, "jbsr %%z%d,%.246s",
35392 dest_operand_number, IDENTIFIER_POINTER (labelname));
35394 else
35395 sprintf (buf, "bl %%z%d", dest_operand_number);
35396 return buf;
35399 /* Generate PIC and indirect symbol stubs. */
35401 void
35402 machopic_output_stub (FILE *file, const char *symb, const char *stub)
35404 unsigned int length;
35405 char *symbol_name, *lazy_ptr_name;
35406 char *local_label_0;
35407 static int label = 0;
35409 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35410 symb = (*targetm.strip_name_encoding) (symb);
35413 length = strlen (symb);
35414 symbol_name = XALLOCAVEC (char, length + 32);
35415 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
35417 lazy_ptr_name = XALLOCAVEC (char, length + 32);
35418 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
35420 if (flag_pic == 2)
35421 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
35422 else
35423 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
35425 if (flag_pic == 2)
35427 fprintf (file, "\t.align 5\n");
35429 fprintf (file, "%s:\n", stub);
35430 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
35432 label++;
35433 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
35434 sprintf (local_label_0, "\"L%011d$spb\"", label);
35436 fprintf (file, "\tmflr r0\n");
35437 if (TARGET_LINK_STACK)
35439 char name[32];
35440 get_ppc476_thunk_name (name);
35441 fprintf (file, "\tbl %s\n", name);
35442 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
35444 else
35446 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
35447 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
35449 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
35450 lazy_ptr_name, local_label_0);
35451 fprintf (file, "\tmtlr r0\n");
35452 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
35453 (TARGET_64BIT ? "ldu" : "lwzu"),
35454 lazy_ptr_name, local_label_0);
35455 fprintf (file, "\tmtctr r12\n");
35456 fprintf (file, "\tbctr\n");
35458 else
35460 fprintf (file, "\t.align 4\n");
35462 fprintf (file, "%s:\n", stub);
35463 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
35465 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
35466 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
35467 (TARGET_64BIT ? "ldu" : "lwzu"),
35468 lazy_ptr_name);
35469 fprintf (file, "\tmtctr r12\n");
35470 fprintf (file, "\tbctr\n");
35473 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
35474 fprintf (file, "%s:\n", lazy_ptr_name);
35475 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
35476 fprintf (file, "%sdyld_stub_binding_helper\n",
35477 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
35480 /* Legitimize PIC addresses. If the address is already
35481 position-independent, we return ORIG. Newly generated
35482 position-independent addresses go into a reg. This is REG if non
35483 zero, otherwise we allocate register(s) as necessary. */
35485 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
35488 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
35489 rtx reg)
35491 rtx base, offset;
35493 if (reg == NULL && ! reload_in_progress && ! reload_completed)
35494 reg = gen_reg_rtx (Pmode);
35496 if (GET_CODE (orig) == CONST)
35498 rtx reg_temp;
35500 if (GET_CODE (XEXP (orig, 0)) == PLUS
35501 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
35502 return orig;
35504 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
35506 /* Use a different reg for the intermediate value, as
35507 it will be marked UNCHANGING. */
35508 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
35509 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
35510 Pmode, reg_temp);
35511 offset =
35512 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
35513 Pmode, reg);
35515 if (GET_CODE (offset) == CONST_INT)
35517 if (SMALL_INT (offset))
35518 return plus_constant (Pmode, base, INTVAL (offset));
35519 else if (! reload_in_progress && ! reload_completed)
35520 offset = force_reg (Pmode, offset);
35521 else
35523 rtx mem = force_const_mem (Pmode, orig);
35524 return machopic_legitimize_pic_address (mem, Pmode, reg);
35527 return gen_rtx_PLUS (Pmode, base, offset);
35530 /* Fall back on generic machopic code. */
35531 return machopic_legitimize_pic_address (orig, mode, reg);
35534 /* Output a .machine directive for the Darwin assembler, and call
35535 the generic start_file routine. */
35537 static void
35538 rs6000_darwin_file_start (void)
35540 static const struct
35542 const char *arg;
35543 const char *name;
35544 HOST_WIDE_INT if_set;
35545 } mapping[] = {
35546 { "ppc64", "ppc64", MASK_64BIT },
35547 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
35548 { "power4", "ppc970", 0 },
35549 { "G5", "ppc970", 0 },
35550 { "7450", "ppc7450", 0 },
35551 { "7400", "ppc7400", MASK_ALTIVEC },
35552 { "G4", "ppc7400", 0 },
35553 { "750", "ppc750", 0 },
35554 { "740", "ppc750", 0 },
35555 { "G3", "ppc750", 0 },
35556 { "604e", "ppc604e", 0 },
35557 { "604", "ppc604", 0 },
35558 { "603e", "ppc603", 0 },
35559 { "603", "ppc603", 0 },
35560 { "601", "ppc601", 0 },
35561 { NULL, "ppc", 0 } };
35562 const char *cpu_id = "";
35563 size_t i;
35565 rs6000_file_start ();
35566 darwin_file_start ();
35568 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
35570 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
35571 cpu_id = rs6000_default_cpu;
35573 if (global_options_set.x_rs6000_cpu_index)
35574 cpu_id = processor_target_table[rs6000_cpu_index].name;
35576 /* Look through the mapping array. Pick the first name that either
35577 matches the argument, has a bit set in IF_SET that is also set
35578 in the target flags, or has a NULL name. */
35580 i = 0;
35581 while (mapping[i].arg != NULL
35582 && strcmp (mapping[i].arg, cpu_id) != 0
35583 && (mapping[i].if_set & rs6000_isa_flags) == 0)
35584 i++;
35586 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
35589 #endif /* TARGET_MACHO */
35591 #if TARGET_ELF
35592 static int
35593 rs6000_elf_reloc_rw_mask (void)
35595 if (flag_pic)
35596 return 3;
35597 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35598 return 2;
35599 else
35600 return 0;
35603 /* Record an element in the table of global constructors. SYMBOL is
35604 a SYMBOL_REF of the function to be called; PRIORITY is a number
35605 between 0 and MAX_INIT_PRIORITY.
35607 This differs from default_named_section_asm_out_constructor in
35608 that we have special handling for -mrelocatable. */
35610 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
35611 static void
35612 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
35614 const char *section = ".ctors";
35615 char buf[18];
35617 if (priority != DEFAULT_INIT_PRIORITY)
35619 sprintf (buf, ".ctors.%.5u",
35620 /* Invert the numbering so the linker puts us in the proper
35621 order; constructors are run from right to left, and the
35622 linker sorts in increasing order. */
35623 MAX_INIT_PRIORITY - priority);
35624 section = buf;
35627 switch_to_section (get_section (section, SECTION_WRITE, NULL));
35628 assemble_align (POINTER_SIZE);
35630 if (DEFAULT_ABI == ABI_V4
35631 && (TARGET_RELOCATABLE || flag_pic > 1))
35633 fputs ("\t.long (", asm_out_file);
35634 output_addr_const (asm_out_file, symbol);
35635 fputs (")@fixup\n", asm_out_file);
35637 else
35638 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
35641 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
35642 static void
35643 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
35645 const char *section = ".dtors";
35646 char buf[18];
35648 if (priority != DEFAULT_INIT_PRIORITY)
35650 sprintf (buf, ".dtors.%.5u",
35651 /* Invert the numbering so the linker puts us in the proper
35652 order; constructors are run from right to left, and the
35653 linker sorts in increasing order. */
35654 MAX_INIT_PRIORITY - priority);
35655 section = buf;
35658 switch_to_section (get_section (section, SECTION_WRITE, NULL));
35659 assemble_align (POINTER_SIZE);
35661 if (DEFAULT_ABI == ABI_V4
35662 && (TARGET_RELOCATABLE || flag_pic > 1))
35664 fputs ("\t.long (", asm_out_file);
35665 output_addr_const (asm_out_file, symbol);
35666 fputs (")@fixup\n", asm_out_file);
35668 else
35669 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
35672 void
35673 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
35675 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
35677 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
35678 ASM_OUTPUT_LABEL (file, name);
35679 fputs (DOUBLE_INT_ASM_OP, file);
35680 rs6000_output_function_entry (file, name);
35681 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
35682 if (DOT_SYMBOLS)
35684 fputs ("\t.size\t", file);
35685 assemble_name (file, name);
35686 fputs (",24\n\t.type\t.", file);
35687 assemble_name (file, name);
35688 fputs (",@function\n", file);
35689 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
35691 fputs ("\t.globl\t.", file);
35692 assemble_name (file, name);
35693 putc ('\n', file);
35696 else
35697 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
35698 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
35699 rs6000_output_function_entry (file, name);
35700 fputs (":\n", file);
35701 return;
35704 if (DEFAULT_ABI == ABI_V4
35705 && (TARGET_RELOCATABLE || flag_pic > 1)
35706 && !TARGET_SECURE_PLT
35707 && (!constant_pool_empty_p () || crtl->profile)
35708 && uses_TOC ())
35710 char buf[256];
35712 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
35714 fprintf (file, "\t.long ");
35715 assemble_name (file, toc_label_name);
35716 need_toc_init = 1;
35717 putc ('-', file);
35718 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
35719 assemble_name (file, buf);
35720 putc ('\n', file);
35723 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
35724 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
35726 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
35728 char buf[256];
35730 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
35732 fprintf (file, "\t.quad .TOC.-");
35733 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
35734 assemble_name (file, buf);
35735 putc ('\n', file);
35738 if (DEFAULT_ABI == ABI_AIX)
35740 const char *desc_name, *orig_name;
35742 orig_name = (*targetm.strip_name_encoding) (name);
35743 desc_name = orig_name;
35744 while (*desc_name == '.')
35745 desc_name++;
35747 if (TREE_PUBLIC (decl))
35748 fprintf (file, "\t.globl %s\n", desc_name);
35750 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35751 fprintf (file, "%s:\n", desc_name);
35752 fprintf (file, "\t.long %s\n", orig_name);
35753 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
35754 fputs ("\t.long 0\n", file);
35755 fprintf (file, "\t.previous\n");
35757 ASM_OUTPUT_LABEL (file, name);
35760 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
35761 static void
35762 rs6000_elf_file_end (void)
35764 #ifdef HAVE_AS_GNU_ATTRIBUTE
35765 /* ??? The value emitted depends on options active at file end.
35766 Assume anyone using #pragma or attributes that might change
35767 options knows what they are doing. */
35768 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
35769 && rs6000_passes_float)
35771 int fp;
35773 if (TARGET_DF_FPR | TARGET_DF_SPE)
35774 fp = 1;
35775 else if (TARGET_SF_FPR | TARGET_SF_SPE)
35776 fp = 3;
35777 else
35778 fp = 2;
35779 if (rs6000_passes_long_double)
35781 if (!TARGET_LONG_DOUBLE_128)
35782 fp |= 2 * 4;
35783 else if (TARGET_IEEEQUAD)
35784 fp |= 3 * 4;
35785 else
35786 fp |= 1 * 4;
35788 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
35790 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
35792 if (rs6000_passes_vector)
35793 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
35794 (TARGET_ALTIVEC_ABI ? 2
35795 : TARGET_SPE_ABI ? 3
35796 : 1));
35797 if (rs6000_returns_struct)
35798 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
35799 aix_struct_return ? 2 : 1);
35801 #endif
35802 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
35803 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
35804 file_end_indicate_exec_stack ();
35805 #endif
35807 if (flag_split_stack)
35808 file_end_indicate_split_stack ();
35810 if (cpu_builtin_p)
35812 /* We have expanded a CPU builtin, so we need to emit a reference to
35813 the special symbol that LIBC uses to declare it supports the
35814 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
35815 switch_to_section (data_section);
35816 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
35817 fprintf (asm_out_file, "\t%s %s\n",
35818 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
35821 #endif
35823 #if TARGET_XCOFF
35825 #ifndef HAVE_XCOFF_DWARF_EXTRAS
35826 #define HAVE_XCOFF_DWARF_EXTRAS 0
35827 #endif
35829 static enum unwind_info_type
35830 rs6000_xcoff_debug_unwind_info (void)
35832 return UI_NONE;
35835 static void
35836 rs6000_xcoff_asm_output_anchor (rtx symbol)
35838 char buffer[100];
35840 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
35841 SYMBOL_REF_BLOCK_OFFSET (symbol));
35842 fprintf (asm_out_file, "%s", SET_ASM_OP);
35843 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
35844 fprintf (asm_out_file, ",");
35845 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
35846 fprintf (asm_out_file, "\n");
35849 static void
35850 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
35852 fputs (GLOBAL_ASM_OP, stream);
35853 RS6000_OUTPUT_BASENAME (stream, name);
35854 putc ('\n', stream);
35857 /* A get_unnamed_decl callback, used for read-only sections. PTR
35858 points to the section string variable. */
35860 static void
35861 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
35863 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
35864 *(const char *const *) directive,
35865 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
35868 /* Likewise for read-write sections. */
35870 static void
35871 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
35873 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
35874 *(const char *const *) directive,
35875 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
35878 static void
35879 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
35881 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
35882 *(const char *const *) directive,
35883 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
35886 /* A get_unnamed_section callback, used for switching to toc_section. */
35888 static void
35889 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35891 if (TARGET_MINIMAL_TOC)
35893 /* toc_section is always selected at least once from
35894 rs6000_xcoff_file_start, so this is guaranteed to
35895 always be defined once and only once in each file. */
35896 if (!toc_initialized)
35898 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
35899 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
35900 toc_initialized = 1;
35902 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
35903 (TARGET_32BIT ? "" : ",3"));
35905 else
35906 fputs ("\t.toc\n", asm_out_file);
35909 /* Implement TARGET_ASM_INIT_SECTIONS. */
35911 static void
35912 rs6000_xcoff_asm_init_sections (void)
35914 read_only_data_section
35915 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35916 &xcoff_read_only_section_name);
35918 private_data_section
35919 = get_unnamed_section (SECTION_WRITE,
35920 rs6000_xcoff_output_readwrite_section_asm_op,
35921 &xcoff_private_data_section_name);
35923 tls_data_section
35924 = get_unnamed_section (SECTION_TLS,
35925 rs6000_xcoff_output_tls_section_asm_op,
35926 &xcoff_tls_data_section_name);
35928 tls_private_data_section
35929 = get_unnamed_section (SECTION_TLS,
35930 rs6000_xcoff_output_tls_section_asm_op,
35931 &xcoff_private_data_section_name);
35933 read_only_private_data_section
35934 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35935 &xcoff_private_data_section_name);
35937 toc_section
35938 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
35940 readonly_data_section = read_only_data_section;
35943 static int
35944 rs6000_xcoff_reloc_rw_mask (void)
35946 return 3;
35949 static void
35950 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
35951 tree decl ATTRIBUTE_UNUSED)
35953 int smclass;
35954 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
35956 if (flags & SECTION_EXCLUDE)
35957 smclass = 4;
35958 else if (flags & SECTION_DEBUG)
35960 fprintf (asm_out_file, "\t.dwsect %s\n", name);
35961 return;
35963 else if (flags & SECTION_CODE)
35964 smclass = 0;
35965 else if (flags & SECTION_TLS)
35966 smclass = 3;
35967 else if (flags & SECTION_WRITE)
35968 smclass = 2;
35969 else
35970 smclass = 1;
35972 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
35973 (flags & SECTION_CODE) ? "." : "",
35974 name, suffix[smclass], flags & SECTION_ENTSIZE);
35977 #define IN_NAMED_SECTION(DECL) \
35978 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
35979 && DECL_SECTION_NAME (DECL) != NULL)
35981 static section *
35982 rs6000_xcoff_select_section (tree decl, int reloc,
35983 unsigned HOST_WIDE_INT align)
35985 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
35986 named section. */
35987 if (align > BIGGEST_ALIGNMENT)
35989 resolve_unique_section (decl, reloc, true);
35990 if (IN_NAMED_SECTION (decl))
35991 return get_named_section (decl, NULL, reloc);
35994 if (decl_readonly_section (decl, reloc))
35996 if (TREE_PUBLIC (decl))
35997 return read_only_data_section;
35998 else
35999 return read_only_private_data_section;
36001 else
36003 #if HAVE_AS_TLS
36004 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36006 if (TREE_PUBLIC (decl))
36007 return tls_data_section;
36008 else if (bss_initializer_p (decl))
36010 /* Convert to COMMON to emit in BSS. */
36011 DECL_COMMON (decl) = 1;
36012 return tls_comm_section;
36014 else
36015 return tls_private_data_section;
36017 else
36018 #endif
36019 if (TREE_PUBLIC (decl))
36020 return data_section;
36021 else
36022 return private_data_section;
36026 static void
36027 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
36029 const char *name;
36031 /* Use select_section for private data and uninitialized data with
36032 alignment <= BIGGEST_ALIGNMENT. */
36033 if (!TREE_PUBLIC (decl)
36034 || DECL_COMMON (decl)
36035 || (DECL_INITIAL (decl) == NULL_TREE
36036 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
36037 || DECL_INITIAL (decl) == error_mark_node
36038 || (flag_zero_initialized_in_bss
36039 && initializer_zerop (DECL_INITIAL (decl))))
36040 return;
36042 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36043 name = (*targetm.strip_name_encoding) (name);
36044 set_decl_section_name (decl, name);
36047 /* Select section for constant in constant pool.
36049 On RS/6000, all constants are in the private read-only data area.
36050 However, if this is being placed in the TOC it must be output as a
36051 toc entry. */
36053 static section *
36054 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
36055 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
36057 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
36058 return toc_section;
36059 else
36060 return read_only_private_data_section;
36063 /* Remove any trailing [DS] or the like from the symbol name. */
36065 static const char *
36066 rs6000_xcoff_strip_name_encoding (const char *name)
36068 size_t len;
36069 if (*name == '*')
36070 name++;
36071 len = strlen (name);
36072 if (name[len - 1] == ']')
36073 return ggc_alloc_string (name, len - 4);
36074 else
36075 return name;
36078 /* Section attributes. AIX is always PIC. */
36080 static unsigned int
36081 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
36083 unsigned int align;
36084 unsigned int flags = default_section_type_flags (decl, name, reloc);
36086 /* Align to at least UNIT size. */
36087 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
36088 align = MIN_UNITS_PER_WORD;
36089 else
36090 /* Increase alignment of large objects if not already stricter. */
36091 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
36092 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
36093 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
36095 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
36098 /* Output at beginning of assembler file.
36100 Initialize the section names for the RS/6000 at this point.
36102 Specify filename, including full path, to assembler.
36104 We want to go into the TOC section so at least one .toc will be emitted.
36105 Also, in order to output proper .bs/.es pairs, we need at least one static
36106 [RW] section emitted.
36108 Finally, declare mcount when profiling to make the assembler happy. */
36110 static void
36111 rs6000_xcoff_file_start (void)
36113 rs6000_gen_section_name (&xcoff_bss_section_name,
36114 main_input_filename, ".bss_");
36115 rs6000_gen_section_name (&xcoff_private_data_section_name,
36116 main_input_filename, ".rw_");
36117 rs6000_gen_section_name (&xcoff_read_only_section_name,
36118 main_input_filename, ".ro_");
36119 rs6000_gen_section_name (&xcoff_tls_data_section_name,
36120 main_input_filename, ".tls_");
36121 rs6000_gen_section_name (&xcoff_tbss_section_name,
36122 main_input_filename, ".tbss_[UL]");
36124 fputs ("\t.file\t", asm_out_file);
36125 output_quoted_string (asm_out_file, main_input_filename);
36126 fputc ('\n', asm_out_file);
36127 if (write_symbols != NO_DEBUG)
36128 switch_to_section (private_data_section);
36129 switch_to_section (toc_section);
36130 switch_to_section (text_section);
36131 if (profile_flag)
36132 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
36133 rs6000_file_start ();
36136 /* Output at end of assembler file.
36137 On the RS/6000, referencing data should automatically pull in text. */
36139 static void
36140 rs6000_xcoff_file_end (void)
36142 switch_to_section (text_section);
36143 fputs ("_section_.text:\n", asm_out_file);
36144 switch_to_section (data_section);
36145 fputs (TARGET_32BIT
36146 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
36147 asm_out_file);
36150 struct declare_alias_data
36152 FILE *file;
36153 bool function_descriptor;
36156 /* Declare alias N. A helper function for for_node_and_aliases. */
36158 static bool
36159 rs6000_declare_alias (struct symtab_node *n, void *d)
36161 struct declare_alias_data *data = (struct declare_alias_data *)d;
36162 /* Main symbol is output specially, because varasm machinery does part of
36163 the job for us - we do not need to declare .globl/lglobs and such. */
36164 if (!n->alias || n->weakref)
36165 return false;
36167 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
36168 return false;
36170 /* Prevent assemble_alias from trying to use .set pseudo operation
36171 that does not behave as expected by the middle-end. */
36172 TREE_ASM_WRITTEN (n->decl) = true;
36174 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
36175 char *buffer = (char *) alloca (strlen (name) + 2);
36176 char *p;
36177 int dollar_inside = 0;
36179 strcpy (buffer, name);
36180 p = strchr (buffer, '$');
36181 while (p) {
36182 *p = '_';
36183 dollar_inside++;
36184 p = strchr (p + 1, '$');
36186 if (TREE_PUBLIC (n->decl))
36188 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
36190 if (dollar_inside) {
36191 if (data->function_descriptor)
36192 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
36193 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
36195 if (data->function_descriptor)
36197 fputs ("\t.globl .", data->file);
36198 RS6000_OUTPUT_BASENAME (data->file, buffer);
36199 putc ('\n', data->file);
36201 fputs ("\t.globl ", data->file);
36202 RS6000_OUTPUT_BASENAME (data->file, buffer);
36203 putc ('\n', data->file);
36205 #ifdef ASM_WEAKEN_DECL
36206 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
36207 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
36208 #endif
36210 else
36212 if (dollar_inside)
36214 if (data->function_descriptor)
36215 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
36216 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
36218 if (data->function_descriptor)
36220 fputs ("\t.lglobl .", data->file);
36221 RS6000_OUTPUT_BASENAME (data->file, buffer);
36222 putc ('\n', data->file);
36224 fputs ("\t.lglobl ", data->file);
36225 RS6000_OUTPUT_BASENAME (data->file, buffer);
36226 putc ('\n', data->file);
36228 if (data->function_descriptor)
36229 fputs (".", data->file);
36230 RS6000_OUTPUT_BASENAME (data->file, buffer);
36231 fputs (":\n", data->file);
36232 return false;
36236 #ifdef HAVE_GAS_HIDDEN
36237 /* Helper function to calculate visibility of a DECL
36238 and return the value as a const string. */
36240 static const char *
36241 rs6000_xcoff_visibility (tree decl)
36243 static const char * const visibility_types[] = {
36244 "", ",protected", ",hidden", ",internal"
36247 enum symbol_visibility vis = DECL_VISIBILITY (decl);
36249 if (TREE_CODE (decl) == FUNCTION_DECL
36250 && cgraph_node::get (decl)
36251 && cgraph_node::get (decl)->instrumentation_clone
36252 && cgraph_node::get (decl)->instrumented_version)
36253 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
36255 return visibility_types[vis];
36257 #endif
36260 /* This macro produces the initial definition of a function name.
36261 On the RS/6000, we need to place an extra '.' in the function name and
36262 output the function descriptor.
36263 Dollar signs are converted to underscores.
36265 The csect for the function will have already been created when
36266 text_section was selected. We do have to go back to that csect, however.
36268 The third and fourth parameters to the .function pseudo-op (16 and 044)
36269 are placeholders which no longer have any use.
36271 Because AIX assembler's .set command has unexpected semantics, we output
36272 all aliases as alternative labels in front of the definition. */
36274 void
36275 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
36277 char *buffer = (char *) alloca (strlen (name) + 1);
36278 char *p;
36279 int dollar_inside = 0;
36280 struct declare_alias_data data = {file, false};
36282 strcpy (buffer, name);
36283 p = strchr (buffer, '$');
36284 while (p) {
36285 *p = '_';
36286 dollar_inside++;
36287 p = strchr (p + 1, '$');
36289 if (TREE_PUBLIC (decl))
36291 if (!RS6000_WEAK || !DECL_WEAK (decl))
36293 if (dollar_inside) {
36294 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
36295 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
36297 fputs ("\t.globl .", file);
36298 RS6000_OUTPUT_BASENAME (file, buffer);
36299 #ifdef HAVE_GAS_HIDDEN
36300 fputs (rs6000_xcoff_visibility (decl), file);
36301 #endif
36302 putc ('\n', file);
36305 else
36307 if (dollar_inside) {
36308 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
36309 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
36311 fputs ("\t.lglobl .", file);
36312 RS6000_OUTPUT_BASENAME (file, buffer);
36313 putc ('\n', file);
36315 fputs ("\t.csect ", file);
36316 RS6000_OUTPUT_BASENAME (file, buffer);
36317 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
36318 RS6000_OUTPUT_BASENAME (file, buffer);
36319 fputs (":\n", file);
36320 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
36321 &data, true);
36322 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
36323 RS6000_OUTPUT_BASENAME (file, buffer);
36324 fputs (", TOC[tc0], 0\n", file);
36325 in_section = NULL;
36326 switch_to_section (function_section (decl));
36327 putc ('.', file);
36328 RS6000_OUTPUT_BASENAME (file, buffer);
36329 fputs (":\n", file);
36330 data.function_descriptor = true;
36331 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
36332 &data, true);
36333 if (!DECL_IGNORED_P (decl))
36335 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36336 xcoffout_declare_function (file, decl, buffer);
36337 else if (write_symbols == DWARF2_DEBUG)
36339 name = (*targetm.strip_name_encoding) (name);
36340 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
36343 return;
36347 /* Output assembly language to globalize a symbol from a DECL,
36348 possibly with visibility. */
36350 void
36351 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
36353 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
36354 fputs (GLOBAL_ASM_OP, stream);
36355 RS6000_OUTPUT_BASENAME (stream, name);
36356 #ifdef HAVE_GAS_HIDDEN
36357 fputs (rs6000_xcoff_visibility (decl), stream);
36358 #endif
36359 putc ('\n', stream);
36362 /* Output assembly language to define a symbol as COMMON from a DECL,
36363 possibly with visibility. */
36365 void
36366 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
36367 tree decl ATTRIBUTE_UNUSED,
36368 const char *name,
36369 unsigned HOST_WIDE_INT size,
36370 unsigned HOST_WIDE_INT align)
36372 unsigned HOST_WIDE_INT align2 = 2;
36374 if (align > 32)
36375 align2 = floor_log2 (align / BITS_PER_UNIT);
36376 else if (size > 4)
36377 align2 = 3;
36379 fputs (COMMON_ASM_OP, stream);
36380 RS6000_OUTPUT_BASENAME (stream, name);
36382 fprintf (stream,
36383 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
36384 size, align2);
36386 #ifdef HAVE_GAS_HIDDEN
36387 fputs (rs6000_xcoff_visibility (decl), stream);
36388 #endif
36389 putc ('\n', stream);
36392 /* This macro produces the initial definition of a object (variable) name.
36393 Because AIX assembler's .set command has unexpected semantics, we output
36394 all aliases as alternative labels in front of the definition. */
36396 void
36397 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
36399 struct declare_alias_data data = {file, false};
36400 RS6000_OUTPUT_BASENAME (file, name);
36401 fputs (":\n", file);
36402 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
36403 &data, true);
36406 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
36408 void
36409 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
36411 fputs (integer_asm_op (size, FALSE), file);
36412 assemble_name (file, label);
36413 fputs ("-$", file);
36416 /* Output a symbol offset relative to the dbase for the current object.
36417 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
36418 signed offsets.
36420 __gcc_unwind_dbase is embedded in all executables/libraries through
36421 libgcc/config/rs6000/crtdbase.S. */
36423 void
36424 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
36426 fputs (integer_asm_op (size, FALSE), file);
36427 assemble_name (file, label);
36428 fputs("-__gcc_unwind_dbase", file);
36431 #ifdef HAVE_AS_TLS
36432 static void
36433 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
36435 rtx symbol;
36436 int flags;
36437 const char *symname;
36439 default_encode_section_info (decl, rtl, first);
36441 /* Careful not to prod global register variables. */
36442 if (!MEM_P (rtl))
36443 return;
36444 symbol = XEXP (rtl, 0);
36445 if (GET_CODE (symbol) != SYMBOL_REF)
36446 return;
36448 flags = SYMBOL_REF_FLAGS (symbol);
36450 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36451 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
36453 SYMBOL_REF_FLAGS (symbol) = flags;
36455 /* Append mapping class to extern decls. */
36456 symname = XSTR (symbol, 0);
36457 if (decl /* sync condition with assemble_external () */
36458 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
36459 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
36460 || TREE_CODE (decl) == FUNCTION_DECL)
36461 && symname[strlen (symname) - 1] != ']')
36463 char *newname = (char *) alloca (strlen (symname) + 5);
36464 strcpy (newname, symname);
36465 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
36466 ? "[DS]" : "[UA]"));
36467 XSTR (symbol, 0) = ggc_strdup (newname);
36470 #endif /* HAVE_AS_TLS */
36471 #endif /* TARGET_XCOFF */
36473 void
36474 rs6000_asm_weaken_decl (FILE *stream, tree decl,
36475 const char *name, const char *val)
36477 fputs ("\t.weak\t", stream);
36478 RS6000_OUTPUT_BASENAME (stream, name);
36479 if (decl && TREE_CODE (decl) == FUNCTION_DECL
36480 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
36482 if (TARGET_XCOFF)
36483 fputs ("[DS]", stream);
36484 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
36485 if (TARGET_XCOFF)
36486 fputs (rs6000_xcoff_visibility (decl), stream);
36487 #endif
36488 fputs ("\n\t.weak\t.", stream);
36489 RS6000_OUTPUT_BASENAME (stream, name);
36491 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
36492 if (TARGET_XCOFF)
36493 fputs (rs6000_xcoff_visibility (decl), stream);
36494 #endif
36495 fputc ('\n', stream);
36496 if (val)
36498 #ifdef ASM_OUTPUT_DEF
36499 ASM_OUTPUT_DEF (stream, name, val);
36500 #endif
36501 if (decl && TREE_CODE (decl) == FUNCTION_DECL
36502 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
36504 fputs ("\t.set\t.", stream);
36505 RS6000_OUTPUT_BASENAME (stream, name);
36506 fputs (",.", stream);
36507 RS6000_OUTPUT_BASENAME (stream, val);
36508 fputc ('\n', stream);
36514 /* Return true if INSN should not be copied. */
36516 static bool
36517 rs6000_cannot_copy_insn_p (rtx_insn *insn)
36519 return recog_memoized (insn) >= 0
36520 && get_attr_cannot_copy (insn);
36523 /* Compute a (partial) cost for rtx X. Return true if the complete
36524 cost has been computed, and false if subexpressions should be
36525 scanned. In either case, *TOTAL contains the cost result. */
36527 static bool
36528 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
36529 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
36531 int code = GET_CODE (x);
36533 switch (code)
36535 /* On the RS/6000, if it is valid in the insn, it is free. */
36536 case CONST_INT:
36537 if (((outer_code == SET
36538 || outer_code == PLUS
36539 || outer_code == MINUS)
36540 && (satisfies_constraint_I (x)
36541 || satisfies_constraint_L (x)))
36542 || (outer_code == AND
36543 && (satisfies_constraint_K (x)
36544 || (mode == SImode
36545 ? satisfies_constraint_L (x)
36546 : satisfies_constraint_J (x))))
36547 || ((outer_code == IOR || outer_code == XOR)
36548 && (satisfies_constraint_K (x)
36549 || (mode == SImode
36550 ? satisfies_constraint_L (x)
36551 : satisfies_constraint_J (x))))
36552 || outer_code == ASHIFT
36553 || outer_code == ASHIFTRT
36554 || outer_code == LSHIFTRT
36555 || outer_code == ROTATE
36556 || outer_code == ROTATERT
36557 || outer_code == ZERO_EXTRACT
36558 || (outer_code == MULT
36559 && satisfies_constraint_I (x))
36560 || ((outer_code == DIV || outer_code == UDIV
36561 || outer_code == MOD || outer_code == UMOD)
36562 && exact_log2 (INTVAL (x)) >= 0)
36563 || (outer_code == COMPARE
36564 && (satisfies_constraint_I (x)
36565 || satisfies_constraint_K (x)))
36566 || ((outer_code == EQ || outer_code == NE)
36567 && (satisfies_constraint_I (x)
36568 || satisfies_constraint_K (x)
36569 || (mode == SImode
36570 ? satisfies_constraint_L (x)
36571 : satisfies_constraint_J (x))))
36572 || (outer_code == GTU
36573 && satisfies_constraint_I (x))
36574 || (outer_code == LTU
36575 && satisfies_constraint_P (x)))
36577 *total = 0;
36578 return true;
36580 else if ((outer_code == PLUS
36581 && reg_or_add_cint_operand (x, VOIDmode))
36582 || (outer_code == MINUS
36583 && reg_or_sub_cint_operand (x, VOIDmode))
36584 || ((outer_code == SET
36585 || outer_code == IOR
36586 || outer_code == XOR)
36587 && (INTVAL (x)
36588 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
36590 *total = COSTS_N_INSNS (1);
36591 return true;
36593 /* FALLTHRU */
36595 case CONST_DOUBLE:
36596 case CONST_WIDE_INT:
36597 case CONST:
36598 case HIGH:
36599 case SYMBOL_REF:
36600 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
36601 return true;
36603 case MEM:
36604 /* When optimizing for size, MEM should be slightly more expensive
36605 than generating address, e.g., (plus (reg) (const)).
36606 L1 cache latency is about two instructions. */
36607 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
36608 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
36609 *total += COSTS_N_INSNS (100);
36610 return true;
36612 case LABEL_REF:
36613 *total = 0;
36614 return true;
36616 case PLUS:
36617 case MINUS:
36618 if (FLOAT_MODE_P (mode))
36619 *total = rs6000_cost->fp;
36620 else
36621 *total = COSTS_N_INSNS (1);
36622 return false;
36624 case MULT:
36625 if (GET_CODE (XEXP (x, 1)) == CONST_INT
36626 && satisfies_constraint_I (XEXP (x, 1)))
36628 if (INTVAL (XEXP (x, 1)) >= -256
36629 && INTVAL (XEXP (x, 1)) <= 255)
36630 *total = rs6000_cost->mulsi_const9;
36631 else
36632 *total = rs6000_cost->mulsi_const;
36634 else if (mode == SFmode)
36635 *total = rs6000_cost->fp;
36636 else if (FLOAT_MODE_P (mode))
36637 *total = rs6000_cost->dmul;
36638 else if (mode == DImode)
36639 *total = rs6000_cost->muldi;
36640 else
36641 *total = rs6000_cost->mulsi;
36642 return false;
36644 case FMA:
36645 if (mode == SFmode)
36646 *total = rs6000_cost->fp;
36647 else
36648 *total = rs6000_cost->dmul;
36649 break;
36651 case DIV:
36652 case MOD:
36653 if (FLOAT_MODE_P (mode))
36655 *total = mode == DFmode ? rs6000_cost->ddiv
36656 : rs6000_cost->sdiv;
36657 return false;
36659 /* FALLTHRU */
36661 case UDIV:
36662 case UMOD:
36663 if (GET_CODE (XEXP (x, 1)) == CONST_INT
36664 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
36666 if (code == DIV || code == MOD)
36667 /* Shift, addze */
36668 *total = COSTS_N_INSNS (2);
36669 else
36670 /* Shift */
36671 *total = COSTS_N_INSNS (1);
36673 else
36675 if (GET_MODE (XEXP (x, 1)) == DImode)
36676 *total = rs6000_cost->divdi;
36677 else
36678 *total = rs6000_cost->divsi;
36680 /* Add in shift and subtract for MOD unless we have a mod instruction. */
36681 if (!TARGET_MODULO && (code == MOD || code == UMOD))
36682 *total += COSTS_N_INSNS (2);
36683 return false;
36685 case CTZ:
36686 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
36687 return false;
36689 case FFS:
36690 *total = COSTS_N_INSNS (4);
36691 return false;
36693 case POPCOUNT:
36694 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
36695 return false;
36697 case PARITY:
36698 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
36699 return false;
36701 case NOT:
36702 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
36703 *total = 0;
36704 else
36705 *total = COSTS_N_INSNS (1);
36706 return false;
36708 case AND:
36709 if (CONST_INT_P (XEXP (x, 1)))
36711 rtx left = XEXP (x, 0);
36712 rtx_code left_code = GET_CODE (left);
36714 /* rotate-and-mask: 1 insn. */
36715 if ((left_code == ROTATE
36716 || left_code == ASHIFT
36717 || left_code == LSHIFTRT)
36718 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
36720 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
36721 if (!CONST_INT_P (XEXP (left, 1)))
36722 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
36723 *total += COSTS_N_INSNS (1);
36724 return true;
36727 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
36728 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
36729 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
36730 || (val & 0xffff) == val
36731 || (val & 0xffff0000) == val
36732 || ((val & 0xffff) == 0 && mode == SImode))
36734 *total = rtx_cost (left, mode, AND, 0, speed);
36735 *total += COSTS_N_INSNS (1);
36736 return true;
36739 /* 2 insns. */
36740 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
36742 *total = rtx_cost (left, mode, AND, 0, speed);
36743 *total += COSTS_N_INSNS (2);
36744 return true;
36748 *total = COSTS_N_INSNS (1);
36749 return false;
36751 case IOR:
36752 /* FIXME */
36753 *total = COSTS_N_INSNS (1);
36754 return true;
36756 case CLZ:
36757 case XOR:
36758 case ZERO_EXTRACT:
36759 *total = COSTS_N_INSNS (1);
36760 return false;
36762 case ASHIFT:
36763 /* The EXTSWSLI instruction is a combined instruction. Don't count both
36764 the sign extend and shift separately within the insn. */
36765 if (TARGET_EXTSWSLI && mode == DImode
36766 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
36767 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
36769 *total = 0;
36770 return false;
36772 /* fall through */
36774 case ASHIFTRT:
36775 case LSHIFTRT:
36776 case ROTATE:
36777 case ROTATERT:
36778 /* Handle mul_highpart. */
36779 if (outer_code == TRUNCATE
36780 && GET_CODE (XEXP (x, 0)) == MULT)
36782 if (mode == DImode)
36783 *total = rs6000_cost->muldi;
36784 else
36785 *total = rs6000_cost->mulsi;
36786 return true;
36788 else if (outer_code == AND)
36789 *total = 0;
36790 else
36791 *total = COSTS_N_INSNS (1);
36792 return false;
36794 case SIGN_EXTEND:
36795 case ZERO_EXTEND:
36796 if (GET_CODE (XEXP (x, 0)) == MEM)
36797 *total = 0;
36798 else
36799 *total = COSTS_N_INSNS (1);
36800 return false;
36802 case COMPARE:
36803 case NEG:
36804 case ABS:
36805 if (!FLOAT_MODE_P (mode))
36807 *total = COSTS_N_INSNS (1);
36808 return false;
36810 /* FALLTHRU */
36812 case FLOAT:
36813 case UNSIGNED_FLOAT:
36814 case FIX:
36815 case UNSIGNED_FIX:
36816 case FLOAT_TRUNCATE:
36817 *total = rs6000_cost->fp;
36818 return false;
36820 case FLOAT_EXTEND:
36821 if (mode == DFmode)
36822 *total = rs6000_cost->sfdf_convert;
36823 else
36824 *total = rs6000_cost->fp;
36825 return false;
36827 case UNSPEC:
36828 switch (XINT (x, 1))
36830 case UNSPEC_FRSP:
36831 *total = rs6000_cost->fp;
36832 return true;
36834 default:
36835 break;
36837 break;
36839 case CALL:
36840 case IF_THEN_ELSE:
36841 if (!speed)
36843 *total = COSTS_N_INSNS (1);
36844 return true;
36846 else if (FLOAT_MODE_P (mode)
36847 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
36849 *total = rs6000_cost->fp;
36850 return false;
36852 break;
36854 case NE:
36855 case EQ:
36856 case GTU:
36857 case LTU:
36858 /* Carry bit requires mode == Pmode.
36859 NEG or PLUS already counted so only add one. */
36860 if (mode == Pmode
36861 && (outer_code == NEG || outer_code == PLUS))
36863 *total = COSTS_N_INSNS (1);
36864 return true;
36866 if (outer_code == SET)
36868 if (XEXP (x, 1) == const0_rtx)
36870 if (TARGET_ISEL && !TARGET_MFCRF)
36871 *total = COSTS_N_INSNS (8);
36872 else
36873 *total = COSTS_N_INSNS (2);
36874 return true;
36876 else
36878 *total = COSTS_N_INSNS (3);
36879 return false;
36882 /* FALLTHRU */
36884 case GT:
36885 case LT:
36886 case UNORDERED:
36887 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
36889 if (TARGET_ISEL && !TARGET_MFCRF)
36890 *total = COSTS_N_INSNS (8);
36891 else
36892 *total = COSTS_N_INSNS (2);
36893 return true;
36895 /* CC COMPARE. */
36896 if (outer_code == COMPARE)
36898 *total = 0;
36899 return true;
36901 break;
36903 default:
36904 break;
36907 return false;
36910 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
36912 static bool
36913 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
36914 int opno, int *total, bool speed)
36916 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
36918 fprintf (stderr,
36919 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
36920 "opno = %d, total = %d, speed = %s, x:\n",
36921 ret ? "complete" : "scan inner",
36922 GET_MODE_NAME (mode),
36923 GET_RTX_NAME (outer_code),
36924 opno,
36925 *total,
36926 speed ? "true" : "false");
36928 debug_rtx (x);
36930 return ret;
36933 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
36935 static int
36936 rs6000_debug_address_cost (rtx x, machine_mode mode,
36937 addr_space_t as, bool speed)
36939 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
36941 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
36942 ret, speed ? "true" : "false");
36943 debug_rtx (x);
36945 return ret;
36949 /* A C expression returning the cost of moving data from a register of class
36950 CLASS1 to one of CLASS2. */
36952 static int
36953 rs6000_register_move_cost (machine_mode mode,
36954 reg_class_t from, reg_class_t to)
36956 int ret;
36958 if (TARGET_DEBUG_COST)
36959 dbg_cost_ctrl++;
36961 /* Moves from/to GENERAL_REGS. */
36962 if (reg_classes_intersect_p (to, GENERAL_REGS)
36963 || reg_classes_intersect_p (from, GENERAL_REGS))
36965 reg_class_t rclass = from;
36967 if (! reg_classes_intersect_p (to, GENERAL_REGS))
36968 rclass = to;
36970 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
36971 ret = (rs6000_memory_move_cost (mode, rclass, false)
36972 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
36974 /* It's more expensive to move CR_REGS than CR0_REGS because of the
36975 shift. */
36976 else if (rclass == CR_REGS)
36977 ret = 4;
36979 /* For those processors that have slow LR/CTR moves, make them more
36980 expensive than memory in order to bias spills to memory .*/
36981 else if ((rs6000_cpu == PROCESSOR_POWER6
36982 || rs6000_cpu == PROCESSOR_POWER7
36983 || rs6000_cpu == PROCESSOR_POWER8
36984 || rs6000_cpu == PROCESSOR_POWER9)
36985 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
36986 ret = 6 * hard_regno_nregs[0][mode];
36988 else
36989 /* A move will cost one instruction per GPR moved. */
36990 ret = 2 * hard_regno_nregs[0][mode];
36993 /* If we have VSX, we can easily move between FPR or Altivec registers. */
36994 else if (VECTOR_MEM_VSX_P (mode)
36995 && reg_classes_intersect_p (to, VSX_REGS)
36996 && reg_classes_intersect_p (from, VSX_REGS))
36997 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
36999 /* Moving between two similar registers is just one instruction. */
37000 else if (reg_classes_intersect_p (to, from))
37001 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
37003 /* Everything else has to go through GENERAL_REGS. */
37004 else
37005 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
37006 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
37008 if (TARGET_DEBUG_COST)
37010 if (dbg_cost_ctrl == 1)
37011 fprintf (stderr,
37012 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37013 ret, GET_MODE_NAME (mode), reg_class_names[from],
37014 reg_class_names[to]);
37015 dbg_cost_ctrl--;
37018 return ret;
37021 /* A C expressions returning the cost of moving data of MODE from a register to
37022 or from memory. */
37024 static int
37025 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
37026 bool in ATTRIBUTE_UNUSED)
37028 int ret;
37030 if (TARGET_DEBUG_COST)
37031 dbg_cost_ctrl++;
37033 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
37034 ret = 4 * hard_regno_nregs[0][mode];
37035 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
37036 || reg_classes_intersect_p (rclass, VSX_REGS)))
37037 ret = 4 * hard_regno_nregs[32][mode];
37038 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
37039 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
37040 else
37041 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
37043 if (TARGET_DEBUG_COST)
37045 if (dbg_cost_ctrl == 1)
37046 fprintf (stderr,
37047 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37048 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
37049 dbg_cost_ctrl--;
37052 return ret;
37055 /* Returns a code for a target-specific builtin that implements
37056 reciprocal of the function, or NULL_TREE if not available. */
37058 static tree
37059 rs6000_builtin_reciprocal (tree fndecl)
37061 switch (DECL_FUNCTION_CODE (fndecl))
37063 case VSX_BUILTIN_XVSQRTDP:
37064 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
37065 return NULL_TREE;
37067 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
37069 case VSX_BUILTIN_XVSQRTSP:
37070 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
37071 return NULL_TREE;
37073 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
37075 default:
37076 return NULL_TREE;
37080 /* Load up a constant. If the mode is a vector mode, splat the value across
37081 all of the vector elements. */
37083 static rtx
37084 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
37086 rtx reg;
37088 if (mode == SFmode || mode == DFmode)
37090 rtx d = const_double_from_real_value (dconst, mode);
37091 reg = force_reg (mode, d);
37093 else if (mode == V4SFmode)
37095 rtx d = const_double_from_real_value (dconst, SFmode);
37096 rtvec v = gen_rtvec (4, d, d, d, d);
37097 reg = gen_reg_rtx (mode);
37098 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37100 else if (mode == V2DFmode)
37102 rtx d = const_double_from_real_value (dconst, DFmode);
37103 rtvec v = gen_rtvec (2, d, d);
37104 reg = gen_reg_rtx (mode);
37105 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37107 else
37108 gcc_unreachable ();
37110 return reg;
37113 /* Generate an FMA instruction. */
37115 static void
37116 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
37118 machine_mode mode = GET_MODE (target);
37119 rtx dst;
37121 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
37122 gcc_assert (dst != NULL);
37124 if (dst != target)
37125 emit_move_insn (target, dst);
37128 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37130 static void
37131 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
37133 machine_mode mode = GET_MODE (dst);
37134 rtx r;
37136 /* This is a tad more complicated, since the fnma_optab is for
37137 a different expression: fma(-m1, m2, a), which is the same
37138 thing except in the case of signed zeros.
37140 Fortunately we know that if FMA is supported that FNMSUB is
37141 also supported in the ISA. Just expand it directly. */
37143 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
37145 r = gen_rtx_NEG (mode, a);
37146 r = gen_rtx_FMA (mode, m1, m2, r);
37147 r = gen_rtx_NEG (mode, r);
37148 emit_insn (gen_rtx_SET (dst, r));
37151 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
37152 add a reg_note saying that this was a division. Support both scalar and
37153 vector divide. Assumes no trapping math and finite arguments. */
37155 void
37156 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
37158 machine_mode mode = GET_MODE (dst);
37159 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
37160 int i;
37162 /* Low precision estimates guarantee 5 bits of accuracy. High
37163 precision estimates guarantee 14 bits of accuracy. SFmode
37164 requires 23 bits of accuracy. DFmode requires 52 bits of
37165 accuracy. Each pass at least doubles the accuracy, leading
37166 to the following. */
37167 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
37168 if (mode == DFmode || mode == V2DFmode)
37169 passes++;
37171 enum insn_code code = optab_handler (smul_optab, mode);
37172 insn_gen_fn gen_mul = GEN_FCN (code);
37174 gcc_assert (code != CODE_FOR_nothing);
37176 one = rs6000_load_constant_and_splat (mode, dconst1);
37178 /* x0 = 1./d estimate */
37179 x0 = gen_reg_rtx (mode);
37180 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
37181 UNSPEC_FRES)));
37183 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
37184 if (passes > 1) {
37186 /* e0 = 1. - d * x0 */
37187 e0 = gen_reg_rtx (mode);
37188 rs6000_emit_nmsub (e0, d, x0, one);
37190 /* x1 = x0 + e0 * x0 */
37191 x1 = gen_reg_rtx (mode);
37192 rs6000_emit_madd (x1, e0, x0, x0);
37194 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
37195 ++i, xprev = xnext, eprev = enext) {
37197 /* enext = eprev * eprev */
37198 enext = gen_reg_rtx (mode);
37199 emit_insn (gen_mul (enext, eprev, eprev));
37201 /* xnext = xprev + enext * xprev */
37202 xnext = gen_reg_rtx (mode);
37203 rs6000_emit_madd (xnext, enext, xprev, xprev);
37206 } else
37207 xprev = x0;
37209 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
37211 /* u = n * xprev */
37212 u = gen_reg_rtx (mode);
37213 emit_insn (gen_mul (u, n, xprev));
37215 /* v = n - (d * u) */
37216 v = gen_reg_rtx (mode);
37217 rs6000_emit_nmsub (v, d, u, n);
37219 /* dst = (v * xprev) + u */
37220 rs6000_emit_madd (dst, v, xprev, u);
37222 if (note_p)
37223 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
37226 /* Goldschmidt's Algorithm for single/double-precision floating point
37227 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
37229 void
37230 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
37232 machine_mode mode = GET_MODE (src);
37233 rtx e = gen_reg_rtx (mode);
37234 rtx g = gen_reg_rtx (mode);
37235 rtx h = gen_reg_rtx (mode);
37237 /* Low precision estimates guarantee 5 bits of accuracy. High
37238 precision estimates guarantee 14 bits of accuracy. SFmode
37239 requires 23 bits of accuracy. DFmode requires 52 bits of
37240 accuracy. Each pass at least doubles the accuracy, leading
37241 to the following. */
37242 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
37243 if (mode == DFmode || mode == V2DFmode)
37244 passes++;
37246 int i;
37247 rtx mhalf;
37248 enum insn_code code = optab_handler (smul_optab, mode);
37249 insn_gen_fn gen_mul = GEN_FCN (code);
37251 gcc_assert (code != CODE_FOR_nothing);
37253 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
37255 /* e = rsqrt estimate */
37256 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
37257 UNSPEC_RSQRT)));
37259 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
37260 if (!recip)
37262 rtx zero = force_reg (mode, CONST0_RTX (mode));
37264 if (mode == SFmode)
37266 rtx target = emit_conditional_move (e, GT, src, zero, mode,
37267 e, zero, mode, 0);
37268 if (target != e)
37269 emit_move_insn (e, target);
37271 else
37273 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
37274 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
37278 /* g = sqrt estimate. */
37279 emit_insn (gen_mul (g, e, src));
37280 /* h = 1/(2*sqrt) estimate. */
37281 emit_insn (gen_mul (h, e, mhalf));
37283 if (recip)
37285 if (passes == 1)
37287 rtx t = gen_reg_rtx (mode);
37288 rs6000_emit_nmsub (t, g, h, mhalf);
37289 /* Apply correction directly to 1/rsqrt estimate. */
37290 rs6000_emit_madd (dst, e, t, e);
37292 else
37294 for (i = 0; i < passes; i++)
37296 rtx t1 = gen_reg_rtx (mode);
37297 rtx g1 = gen_reg_rtx (mode);
37298 rtx h1 = gen_reg_rtx (mode);
37300 rs6000_emit_nmsub (t1, g, h, mhalf);
37301 rs6000_emit_madd (g1, g, t1, g);
37302 rs6000_emit_madd (h1, h, t1, h);
37304 g = g1;
37305 h = h1;
37307 /* Multiply by 2 for 1/rsqrt. */
37308 emit_insn (gen_add3_insn (dst, h, h));
37311 else
37313 rtx t = gen_reg_rtx (mode);
37314 rs6000_emit_nmsub (t, g, h, mhalf);
37315 rs6000_emit_madd (dst, g, t, g);
37318 return;
37321 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
37322 (Power7) targets. DST is the target, and SRC is the argument operand. */
37324 void
37325 rs6000_emit_popcount (rtx dst, rtx src)
37327 machine_mode mode = GET_MODE (dst);
37328 rtx tmp1, tmp2;
37330 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
37331 if (TARGET_POPCNTD)
37333 if (mode == SImode)
37334 emit_insn (gen_popcntdsi2 (dst, src));
37335 else
37336 emit_insn (gen_popcntddi2 (dst, src));
37337 return;
37340 tmp1 = gen_reg_rtx (mode);
37342 if (mode == SImode)
37344 emit_insn (gen_popcntbsi2 (tmp1, src));
37345 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
37346 NULL_RTX, 0);
37347 tmp2 = force_reg (SImode, tmp2);
37348 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
37350 else
37352 emit_insn (gen_popcntbdi2 (tmp1, src));
37353 tmp2 = expand_mult (DImode, tmp1,
37354 GEN_INT ((HOST_WIDE_INT)
37355 0x01010101 << 32 | 0x01010101),
37356 NULL_RTX, 0);
37357 tmp2 = force_reg (DImode, tmp2);
37358 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
37363 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
37364 target, and SRC is the argument operand. */
37366 void
37367 rs6000_emit_parity (rtx dst, rtx src)
37369 machine_mode mode = GET_MODE (dst);
37370 rtx tmp;
37372 tmp = gen_reg_rtx (mode);
37374 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
37375 if (TARGET_CMPB)
37377 if (mode == SImode)
37379 emit_insn (gen_popcntbsi2 (tmp, src));
37380 emit_insn (gen_paritysi2_cmpb (dst, tmp));
37382 else
37384 emit_insn (gen_popcntbdi2 (tmp, src));
37385 emit_insn (gen_paritydi2_cmpb (dst, tmp));
37387 return;
37390 if (mode == SImode)
37392 /* Is mult+shift >= shift+xor+shift+xor? */
37393 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
37395 rtx tmp1, tmp2, tmp3, tmp4;
37397 tmp1 = gen_reg_rtx (SImode);
37398 emit_insn (gen_popcntbsi2 (tmp1, src));
37400 tmp2 = gen_reg_rtx (SImode);
37401 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
37402 tmp3 = gen_reg_rtx (SImode);
37403 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
37405 tmp4 = gen_reg_rtx (SImode);
37406 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
37407 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
37409 else
37410 rs6000_emit_popcount (tmp, src);
37411 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
37413 else
37415 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
37416 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
37418 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
37420 tmp1 = gen_reg_rtx (DImode);
37421 emit_insn (gen_popcntbdi2 (tmp1, src));
37423 tmp2 = gen_reg_rtx (DImode);
37424 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
37425 tmp3 = gen_reg_rtx (DImode);
37426 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
37428 tmp4 = gen_reg_rtx (DImode);
37429 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
37430 tmp5 = gen_reg_rtx (DImode);
37431 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
37433 tmp6 = gen_reg_rtx (DImode);
37434 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
37435 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
37437 else
37438 rs6000_emit_popcount (tmp, src);
37439 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
37443 /* Expand an Altivec constant permutation for little endian mode.
37444 There are two issues: First, the two input operands must be
37445 swapped so that together they form a double-wide array in LE
37446 order. Second, the vperm instruction has surprising behavior
37447 in LE mode: it interprets the elements of the source vectors
37448 in BE mode ("left to right") and interprets the elements of
37449 the destination vector in LE mode ("right to left"). To
37450 correct for this, we must subtract each element of the permute
37451 control vector from 31.
37453 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
37454 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
37455 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
37456 serve as the permute control vector. Then, in BE mode,
37458 vperm 9,10,11,12
37460 places the desired result in vr9. However, in LE mode the
37461 vector contents will be
37463 vr10 = 00000003 00000002 00000001 00000000
37464 vr11 = 00000007 00000006 00000005 00000004
37466 The result of the vperm using the same permute control vector is
37468 vr9 = 05000000 07000000 01000000 03000000
37470 That is, the leftmost 4 bytes of vr10 are interpreted as the
37471 source for the rightmost 4 bytes of vr9, and so on.
37473 If we change the permute control vector to
37475 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
37477 and issue
37479 vperm 9,11,10,12
37481 we get the desired
37483 vr9 = 00000006 00000004 00000002 00000000. */
37485 void
37486 altivec_expand_vec_perm_const_le (rtx operands[4])
37488 unsigned int i;
37489 rtx perm[16];
37490 rtx constv, unspec;
37491 rtx target = operands[0];
37492 rtx op0 = operands[1];
37493 rtx op1 = operands[2];
37494 rtx sel = operands[3];
37496 /* Unpack and adjust the constant selector. */
37497 for (i = 0; i < 16; ++i)
37499 rtx e = XVECEXP (sel, 0, i);
37500 unsigned int elt = 31 - (INTVAL (e) & 31);
37501 perm[i] = GEN_INT (elt);
37504 /* Expand to a permute, swapping the inputs and using the
37505 adjusted selector. */
37506 if (!REG_P (op0))
37507 op0 = force_reg (V16QImode, op0);
37508 if (!REG_P (op1))
37509 op1 = force_reg (V16QImode, op1);
37511 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
37512 constv = force_reg (V16QImode, constv);
37513 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
37514 UNSPEC_VPERM);
37515 if (!REG_P (target))
37517 rtx tmp = gen_reg_rtx (V16QImode);
37518 emit_move_insn (tmp, unspec);
37519 unspec = tmp;
37522 emit_move_insn (target, unspec);
37525 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
37526 permute control vector. But here it's not a constant, so we must
37527 generate a vector NAND or NOR to do the adjustment. */
37529 void
37530 altivec_expand_vec_perm_le (rtx operands[4])
37532 rtx notx, iorx, unspec;
37533 rtx target = operands[0];
37534 rtx op0 = operands[1];
37535 rtx op1 = operands[2];
37536 rtx sel = operands[3];
37537 rtx tmp = target;
37538 rtx norreg = gen_reg_rtx (V16QImode);
37539 machine_mode mode = GET_MODE (target);
37541 /* Get everything in regs so the pattern matches. */
37542 if (!REG_P (op0))
37543 op0 = force_reg (mode, op0);
37544 if (!REG_P (op1))
37545 op1 = force_reg (mode, op1);
37546 if (!REG_P (sel))
37547 sel = force_reg (V16QImode, sel);
37548 if (!REG_P (target))
37549 tmp = gen_reg_rtx (mode);
37551 if (TARGET_P9_VECTOR)
37553 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
37554 UNSPEC_VPERMR);
37556 else
37558 /* Invert the selector with a VNAND if available, else a VNOR.
37559 The VNAND is preferred for future fusion opportunities. */
37560 notx = gen_rtx_NOT (V16QImode, sel);
37561 iorx = (TARGET_P8_VECTOR
37562 ? gen_rtx_IOR (V16QImode, notx, notx)
37563 : gen_rtx_AND (V16QImode, notx, notx));
37564 emit_insn (gen_rtx_SET (norreg, iorx));
37566 /* Permute with operands reversed and adjusted selector. */
37567 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
37568 UNSPEC_VPERM);
37571 /* Copy into target, possibly by way of a register. */
37572 if (!REG_P (target))
37574 emit_move_insn (tmp, unspec);
37575 unspec = tmp;
37578 emit_move_insn (target, unspec);
37581 /* Expand an Altivec constant permutation. Return true if we match
37582 an efficient implementation; false to fall back to VPERM. */
37584 bool
37585 altivec_expand_vec_perm_const (rtx operands[4])
37587 struct altivec_perm_insn {
37588 HOST_WIDE_INT mask;
37589 enum insn_code impl;
37590 unsigned char perm[16];
37592 static const struct altivec_perm_insn patterns[] = {
37593 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
37594 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
37595 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
37596 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
37597 { OPTION_MASK_ALTIVEC,
37598 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
37599 : CODE_FOR_altivec_vmrglb_direct),
37600 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
37601 { OPTION_MASK_ALTIVEC,
37602 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
37603 : CODE_FOR_altivec_vmrglh_direct),
37604 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
37605 { OPTION_MASK_ALTIVEC,
37606 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
37607 : CODE_FOR_altivec_vmrglw_direct),
37608 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
37609 { OPTION_MASK_ALTIVEC,
37610 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
37611 : CODE_FOR_altivec_vmrghb_direct),
37612 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
37613 { OPTION_MASK_ALTIVEC,
37614 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
37615 : CODE_FOR_altivec_vmrghh_direct),
37616 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
37617 { OPTION_MASK_ALTIVEC,
37618 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
37619 : CODE_FOR_altivec_vmrghw_direct),
37620 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
37621 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
37622 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
37623 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
37624 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
37627 unsigned int i, j, elt, which;
37628 unsigned char perm[16];
37629 rtx target, op0, op1, sel, x;
37630 bool one_vec;
37632 target = operands[0];
37633 op0 = operands[1];
37634 op1 = operands[2];
37635 sel = operands[3];
37637 /* Unpack the constant selector. */
37638 for (i = which = 0; i < 16; ++i)
37640 rtx e = XVECEXP (sel, 0, i);
37641 elt = INTVAL (e) & 31;
37642 which |= (elt < 16 ? 1 : 2);
37643 perm[i] = elt;
37646 /* Simplify the constant selector based on operands. */
37647 switch (which)
37649 default:
37650 gcc_unreachable ();
37652 case 3:
37653 one_vec = false;
37654 if (!rtx_equal_p (op0, op1))
37655 break;
37656 /* FALLTHRU */
37658 case 2:
37659 for (i = 0; i < 16; ++i)
37660 perm[i] &= 15;
37661 op0 = op1;
37662 one_vec = true;
37663 break;
37665 case 1:
37666 op1 = op0;
37667 one_vec = true;
37668 break;
37671 /* Look for splat patterns. */
37672 if (one_vec)
37674 elt = perm[0];
37676 for (i = 0; i < 16; ++i)
37677 if (perm[i] != elt)
37678 break;
37679 if (i == 16)
37681 if (!BYTES_BIG_ENDIAN)
37682 elt = 15 - elt;
37683 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
37684 return true;
37687 if (elt % 2 == 0)
37689 for (i = 0; i < 16; i += 2)
37690 if (perm[i] != elt || perm[i + 1] != elt + 1)
37691 break;
37692 if (i == 16)
37694 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
37695 x = gen_reg_rtx (V8HImode);
37696 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
37697 GEN_INT (field)));
37698 emit_move_insn (target, gen_lowpart (V16QImode, x));
37699 return true;
37703 if (elt % 4 == 0)
37705 for (i = 0; i < 16; i += 4)
37706 if (perm[i] != elt
37707 || perm[i + 1] != elt + 1
37708 || perm[i + 2] != elt + 2
37709 || perm[i + 3] != elt + 3)
37710 break;
37711 if (i == 16)
37713 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
37714 x = gen_reg_rtx (V4SImode);
37715 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
37716 GEN_INT (field)));
37717 emit_move_insn (target, gen_lowpart (V16QImode, x));
37718 return true;
37723 /* Look for merge and pack patterns. */
37724 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
37726 bool swapped;
37728 if ((patterns[j].mask & rs6000_isa_flags) == 0)
37729 continue;
37731 elt = patterns[j].perm[0];
37732 if (perm[0] == elt)
37733 swapped = false;
37734 else if (perm[0] == elt + 16)
37735 swapped = true;
37736 else
37737 continue;
37738 for (i = 1; i < 16; ++i)
37740 elt = patterns[j].perm[i];
37741 if (swapped)
37742 elt = (elt >= 16 ? elt - 16 : elt + 16);
37743 else if (one_vec && elt >= 16)
37744 elt -= 16;
37745 if (perm[i] != elt)
37746 break;
37748 if (i == 16)
37750 enum insn_code icode = patterns[j].impl;
37751 machine_mode omode = insn_data[icode].operand[0].mode;
37752 machine_mode imode = insn_data[icode].operand[1].mode;
37754 /* For little-endian, don't use vpkuwum and vpkuhum if the
37755 underlying vector type is not V4SI and V8HI, respectively.
37756 For example, using vpkuwum with a V8HI picks up the even
37757 halfwords (BE numbering) when the even halfwords (LE
37758 numbering) are what we need. */
37759 if (!BYTES_BIG_ENDIAN
37760 && icode == CODE_FOR_altivec_vpkuwum_direct
37761 && ((GET_CODE (op0) == REG
37762 && GET_MODE (op0) != V4SImode)
37763 || (GET_CODE (op0) == SUBREG
37764 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
37765 continue;
37766 if (!BYTES_BIG_ENDIAN
37767 && icode == CODE_FOR_altivec_vpkuhum_direct
37768 && ((GET_CODE (op0) == REG
37769 && GET_MODE (op0) != V8HImode)
37770 || (GET_CODE (op0) == SUBREG
37771 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
37772 continue;
37774 /* For little-endian, the two input operands must be swapped
37775 (or swapped back) to ensure proper right-to-left numbering
37776 from 0 to 2N-1. */
37777 if (swapped ^ !BYTES_BIG_ENDIAN)
37778 std::swap (op0, op1);
37779 if (imode != V16QImode)
37781 op0 = gen_lowpart (imode, op0);
37782 op1 = gen_lowpart (imode, op1);
37784 if (omode == V16QImode)
37785 x = target;
37786 else
37787 x = gen_reg_rtx (omode);
37788 emit_insn (GEN_FCN (icode) (x, op0, op1));
37789 if (omode != V16QImode)
37790 emit_move_insn (target, gen_lowpart (V16QImode, x));
37791 return true;
37795 if (!BYTES_BIG_ENDIAN)
37797 altivec_expand_vec_perm_const_le (operands);
37798 return true;
37801 return false;
37804 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
37805 Return true if we match an efficient implementation. */
37807 static bool
37808 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
37809 unsigned char perm0, unsigned char perm1)
37811 rtx x;
37813 /* If both selectors come from the same operand, fold to single op. */
37814 if ((perm0 & 2) == (perm1 & 2))
37816 if (perm0 & 2)
37817 op0 = op1;
37818 else
37819 op1 = op0;
37821 /* If both operands are equal, fold to simpler permutation. */
37822 if (rtx_equal_p (op0, op1))
37824 perm0 = perm0 & 1;
37825 perm1 = (perm1 & 1) + 2;
37827 /* If the first selector comes from the second operand, swap. */
37828 else if (perm0 & 2)
37830 if (perm1 & 2)
37831 return false;
37832 perm0 -= 2;
37833 perm1 += 2;
37834 std::swap (op0, op1);
37836 /* If the second selector does not come from the second operand, fail. */
37837 else if ((perm1 & 2) == 0)
37838 return false;
37840 /* Success! */
37841 if (target != NULL)
37843 machine_mode vmode, dmode;
37844 rtvec v;
37846 vmode = GET_MODE (target);
37847 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
37848 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
37849 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
37850 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
37851 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
37852 emit_insn (gen_rtx_SET (target, x));
37854 return true;
37857 bool
37858 rs6000_expand_vec_perm_const (rtx operands[4])
37860 rtx target, op0, op1, sel;
37861 unsigned char perm0, perm1;
37863 target = operands[0];
37864 op0 = operands[1];
37865 op1 = operands[2];
37866 sel = operands[3];
37868 /* Unpack the constant selector. */
37869 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
37870 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
37872 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
37875 /* Test whether a constant permutation is supported. */
37877 static bool
37878 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
37879 const unsigned char *sel)
37881 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
37882 if (TARGET_ALTIVEC)
37883 return true;
37885 /* Check for ps_merge* or evmerge* insns. */
37886 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
37887 || (TARGET_SPE && vmode == V2SImode))
37889 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
37890 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
37891 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
37894 return false;
37897 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
37899 static void
37900 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
37901 machine_mode vmode, unsigned nelt, rtx perm[])
37903 machine_mode imode;
37904 rtx x;
37906 imode = vmode;
37907 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
37909 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
37910 imode = mode_for_vector (imode, nelt);
37913 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
37914 x = expand_vec_perm (vmode, op0, op1, x, target);
37915 if (x != target)
37916 emit_move_insn (target, x);
37919 /* Expand an extract even operation. */
37921 void
37922 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
37924 machine_mode vmode = GET_MODE (target);
37925 unsigned i, nelt = GET_MODE_NUNITS (vmode);
37926 rtx perm[16];
37928 for (i = 0; i < nelt; i++)
37929 perm[i] = GEN_INT (i * 2);
37931 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
37934 /* Expand a vector interleave operation. */
37936 void
37937 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
37939 machine_mode vmode = GET_MODE (target);
37940 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
37941 rtx perm[16];
37943 high = (highp ? 0 : nelt / 2);
37944 for (i = 0; i < nelt / 2; i++)
37946 perm[i * 2] = GEN_INT (i + high);
37947 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
37950 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
37953 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
37954 void
37955 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
37957 HOST_WIDE_INT hwi_scale (scale);
37958 REAL_VALUE_TYPE r_pow;
37959 rtvec v = rtvec_alloc (2);
37960 rtx elt;
37961 rtx scale_vec = gen_reg_rtx (V2DFmode);
37962 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
37963 elt = const_double_from_real_value (r_pow, DFmode);
37964 RTVEC_ELT (v, 0) = elt;
37965 RTVEC_ELT (v, 1) = elt;
37966 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
37967 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
37970 /* Return an RTX representing where to find the function value of a
37971 function returning MODE. */
37972 static rtx
37973 rs6000_complex_function_value (machine_mode mode)
37975 unsigned int regno;
37976 rtx r1, r2;
37977 machine_mode inner = GET_MODE_INNER (mode);
37978 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
37980 if (TARGET_FLOAT128_TYPE
37981 && (mode == KCmode
37982 || (mode == TCmode && TARGET_IEEEQUAD)))
37983 regno = ALTIVEC_ARG_RETURN;
37985 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37986 regno = FP_ARG_RETURN;
37988 else
37990 regno = GP_ARG_RETURN;
37992 /* 32-bit is OK since it'll go in r3/r4. */
37993 if (TARGET_32BIT && inner_bytes >= 4)
37994 return gen_rtx_REG (mode, regno);
37997 if (inner_bytes >= 8)
37998 return gen_rtx_REG (mode, regno);
38000 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
38001 const0_rtx);
38002 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
38003 GEN_INT (inner_bytes));
38004 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
38007 /* Return an rtx describing a return value of MODE as a PARALLEL
38008 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38009 stride REG_STRIDE. */
38011 static rtx
38012 rs6000_parallel_return (machine_mode mode,
38013 int n_elts, machine_mode elt_mode,
38014 unsigned int regno, unsigned int reg_stride)
38016 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38018 int i;
38019 for (i = 0; i < n_elts; i++)
38021 rtx r = gen_rtx_REG (elt_mode, regno);
38022 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
38023 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
38024 regno += reg_stride;
38027 return par;
38030 /* Target hook for TARGET_FUNCTION_VALUE.
38032 On the SPE, both FPs and vectors are returned in r3.
38034 On RS/6000 an integer value is in r3 and a floating-point value is in
38035 fp1, unless -msoft-float. */
38037 static rtx
38038 rs6000_function_value (const_tree valtype,
38039 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
38040 bool outgoing ATTRIBUTE_UNUSED)
38042 machine_mode mode;
38043 unsigned int regno;
38044 machine_mode elt_mode;
38045 int n_elts;
38047 /* Special handling for structs in darwin64. */
38048 if (TARGET_MACHO
38049 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
38051 CUMULATIVE_ARGS valcum;
38052 rtx valret;
38054 valcum.words = 0;
38055 valcum.fregno = FP_ARG_MIN_REG;
38056 valcum.vregno = ALTIVEC_ARG_MIN_REG;
38057 /* Do a trial code generation as if this were going to be passed as
38058 an argument; if any part goes in memory, we return NULL. */
38059 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
38060 if (valret)
38061 return valret;
38062 /* Otherwise fall through to standard ABI rules. */
38065 mode = TYPE_MODE (valtype);
38067 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38068 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
38070 int first_reg, n_regs;
38072 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
38074 /* _Decimal128 must use even/odd register pairs. */
38075 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38076 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
38078 else
38080 first_reg = ALTIVEC_ARG_RETURN;
38081 n_regs = 1;
38084 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
38087 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38088 if (TARGET_32BIT && TARGET_POWERPC64)
38089 switch (mode)
38091 default:
38092 break;
38093 case DImode:
38094 case SCmode:
38095 case DCmode:
38096 case TCmode:
38097 int count = GET_MODE_SIZE (mode) / 4;
38098 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
38101 if ((INTEGRAL_TYPE_P (valtype)
38102 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
38103 || POINTER_TYPE_P (valtype))
38104 mode = TARGET_32BIT ? SImode : DImode;
38106 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38107 /* _Decimal128 must use an even/odd register pair. */
38108 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38109 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
38110 && !FLOAT128_VECTOR_P (mode)
38111 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
38112 regno = FP_ARG_RETURN;
38113 else if (TREE_CODE (valtype) == COMPLEX_TYPE
38114 && targetm.calls.split_complex_arg)
38115 return rs6000_complex_function_value (mode);
38116 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38117 return register is used in both cases, and we won't see V2DImode/V2DFmode
38118 for pure altivec, combine the two cases. */
38119 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
38120 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
38121 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38122 regno = ALTIVEC_ARG_RETURN;
38123 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38124 && (mode == DFmode || mode == DCmode
38125 || FLOAT128_IBM_P (mode) || mode == TCmode))
38126 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38127 else
38128 regno = GP_ARG_RETURN;
38130 return gen_rtx_REG (mode, regno);
38133 /* Define how to find the value returned by a library function
38134 assuming the value has mode MODE. */
38136 rs6000_libcall_value (machine_mode mode)
38138 unsigned int regno;
38140 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
38141 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
38142 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
38144 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38145 /* _Decimal128 must use an even/odd register pair. */
38146 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38147 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
38148 && TARGET_HARD_FLOAT && TARGET_FPRS
38149 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
38150 regno = FP_ARG_RETURN;
38151 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38152 return register is used in both cases, and we won't see V2DImode/V2DFmode
38153 for pure altivec, combine the two cases. */
38154 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
38155 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
38156 regno = ALTIVEC_ARG_RETURN;
38157 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
38158 return rs6000_complex_function_value (mode);
38159 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38160 && (mode == DFmode || mode == DCmode
38161 || FLOAT128_IBM_P (mode) || mode == TCmode))
38162 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38163 else
38164 regno = GP_ARG_RETURN;
38166 return gen_rtx_REG (mode, regno);
38170 /* Return true if we use LRA instead of reload pass. */
38171 static bool
38172 rs6000_lra_p (void)
38174 return TARGET_LRA;
38177 /* Compute register pressure classes. We implement the target hook to avoid
38178 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
38179 lead to incorrect estimates of number of available registers and therefor
38180 increased register pressure/spill. */
38181 static int
38182 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
38184 int n;
38186 n = 0;
38187 pressure_classes[n++] = GENERAL_REGS;
38188 if (TARGET_VSX)
38189 pressure_classes[n++] = VSX_REGS;
38190 else
38192 if (TARGET_ALTIVEC)
38193 pressure_classes[n++] = ALTIVEC_REGS;
38194 if (TARGET_HARD_FLOAT && TARGET_FPRS)
38195 pressure_classes[n++] = FLOAT_REGS;
38197 pressure_classes[n++] = CR_REGS;
38198 pressure_classes[n++] = SPECIAL_REGS;
38200 return n;
38203 /* Given FROM and TO register numbers, say whether this elimination is allowed.
38204 Frame pointer elimination is automatically handled.
38206 For the RS/6000, if frame pointer elimination is being done, we would like
38207 to convert ap into fp, not sp.
38209 We need r30 if -mminimal-toc was specified, and there are constant pool
38210 references. */
38212 static bool
38213 rs6000_can_eliminate (const int from, const int to)
38215 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
38216 ? ! frame_pointer_needed
38217 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
38218 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
38219 || constant_pool_empty_p ()
38220 : true);
38223 /* Define the offset between two registers, FROM to be eliminated and its
38224 replacement TO, at the start of a routine. */
38225 HOST_WIDE_INT
38226 rs6000_initial_elimination_offset (int from, int to)
38228 rs6000_stack_t *info = rs6000_stack_info ();
38229 HOST_WIDE_INT offset;
38231 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38232 offset = info->push_p ? 0 : -info->total_size;
38233 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38235 offset = info->push_p ? 0 : -info->total_size;
38236 if (FRAME_GROWS_DOWNWARD)
38237 offset += info->fixed_size + info->vars_size + info->parm_size;
38239 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
38240 offset = FRAME_GROWS_DOWNWARD
38241 ? info->fixed_size + info->vars_size + info->parm_size
38242 : 0;
38243 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
38244 offset = info->total_size;
38245 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38246 offset = info->push_p ? info->total_size : 0;
38247 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
38248 offset = 0;
38249 else
38250 gcc_unreachable ();
38252 return offset;
38255 static rtx
38256 rs6000_dwarf_register_span (rtx reg)
38258 rtx parts[8];
38259 int i, words;
38260 unsigned regno = REGNO (reg);
38261 machine_mode mode = GET_MODE (reg);
38263 if (TARGET_SPE
38264 && regno < 32
38265 && (SPE_VECTOR_MODE (GET_MODE (reg))
38266 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
38267 && mode != SFmode && mode != SDmode && mode != SCmode)))
38269 else
38270 return NULL_RTX;
38272 regno = REGNO (reg);
38274 /* The duality of the SPE register size wreaks all kinds of havoc.
38275 This is a way of distinguishing r0 in 32-bits from r0 in
38276 64-bits. */
38277 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
38278 gcc_assert (words <= 4);
38279 for (i = 0; i < words; i++, regno++)
38281 if (BYTES_BIG_ENDIAN)
38283 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
38284 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
38286 else
38288 parts[2 * i] = gen_rtx_REG (SImode, regno);
38289 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
38293 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
38296 /* Fill in sizes for SPE register high parts in table used by unwinder. */
38298 static void
38299 rs6000_init_dwarf_reg_sizes_extra (tree address)
38301 if (TARGET_SPE)
38303 int i;
38304 machine_mode mode = TYPE_MODE (char_type_node);
38305 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
38306 rtx mem = gen_rtx_MEM (BLKmode, addr);
38307 rtx value = gen_int_mode (4, mode);
38309 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
38311 int column = DWARF_REG_TO_UNWIND_COLUMN
38312 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
38313 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
38315 emit_move_insn (adjust_address (mem, mode, offset), value);
38319 if (TARGET_MACHO && ! TARGET_ALTIVEC)
38321 int i;
38322 machine_mode mode = TYPE_MODE (char_type_node);
38323 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
38324 rtx mem = gen_rtx_MEM (BLKmode, addr);
38325 rtx value = gen_int_mode (16, mode);
38327 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
38328 The unwinder still needs to know the size of Altivec registers. */
38330 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
38332 int column = DWARF_REG_TO_UNWIND_COLUMN
38333 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
38334 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
38336 emit_move_insn (adjust_address (mem, mode, offset), value);
38341 /* Map internal gcc register numbers to debug format register numbers.
38342 FORMAT specifies the type of debug register number to use:
38343 0 -- debug information, except for frame-related sections
38344 1 -- DWARF .debug_frame section
38345 2 -- DWARF .eh_frame section */
38347 unsigned int
38348 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
38350 /* We never use the GCC internal number for SPE high registers.
38351 Those are mapped to the 1200..1231 range for all debug formats. */
38352 if (SPE_HIGH_REGNO_P (regno))
38353 return regno - FIRST_SPE_HIGH_REGNO + 1200;
38355 /* Except for the above, we use the internal number for non-DWARF
38356 debug information, and also for .eh_frame. */
38357 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
38358 return regno;
38360 /* On some platforms, we use the standard DWARF register
38361 numbering for .debug_info and .debug_frame. */
38362 #ifdef RS6000_USE_DWARF_NUMBERING
38363 if (regno <= 63)
38364 return regno;
38365 if (regno == LR_REGNO)
38366 return 108;
38367 if (regno == CTR_REGNO)
38368 return 109;
38369 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
38370 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
38371 The actual code emitted saves the whole of CR, so we map CR2_REGNO
38372 to the DWARF reg for CR. */
38373 if (format == 1 && regno == CR2_REGNO)
38374 return 64;
38375 if (CR_REGNO_P (regno))
38376 return regno - CR0_REGNO + 86;
38377 if (regno == CA_REGNO)
38378 return 101; /* XER */
38379 if (ALTIVEC_REGNO_P (regno))
38380 return regno - FIRST_ALTIVEC_REGNO + 1124;
38381 if (regno == VRSAVE_REGNO)
38382 return 356;
38383 if (regno == VSCR_REGNO)
38384 return 67;
38385 if (regno == SPE_ACC_REGNO)
38386 return 99;
38387 if (regno == SPEFSCR_REGNO)
38388 return 612;
38389 #endif
38390 return regno;
38393 /* target hook eh_return_filter_mode */
38394 static machine_mode
38395 rs6000_eh_return_filter_mode (void)
38397 return TARGET_32BIT ? SImode : word_mode;
38400 /* Target hook for scalar_mode_supported_p. */
38401 static bool
38402 rs6000_scalar_mode_supported_p (machine_mode mode)
38404 /* -m32 does not support TImode. This is the default, from
38405 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
38406 same ABI as for -m32. But default_scalar_mode_supported_p allows
38407 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
38408 for -mpowerpc64. */
38409 if (TARGET_32BIT && mode == TImode)
38410 return false;
38412 if (DECIMAL_FLOAT_MODE_P (mode))
38413 return default_decimal_float_supported_p ();
38414 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
38415 return true;
38416 else
38417 return default_scalar_mode_supported_p (mode);
38420 /* Target hook for vector_mode_supported_p. */
38421 static bool
38422 rs6000_vector_mode_supported_p (machine_mode mode)
38425 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
38426 return true;
38428 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
38429 return true;
38431 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
38432 128-bit, the compiler might try to widen IEEE 128-bit to IBM
38433 double-double. */
38434 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
38435 return true;
38437 else
38438 return false;
38441 /* Target hook for floatn_mode. */
38442 static machine_mode
38443 rs6000_floatn_mode (int n, bool extended)
38445 if (extended)
38447 switch (n)
38449 case 32:
38450 return DFmode;
38452 case 64:
38453 if (TARGET_FLOAT128_KEYWORD)
38454 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
38455 else
38456 return VOIDmode;
38458 case 128:
38459 return VOIDmode;
38461 default:
38462 /* Those are the only valid _FloatNx types. */
38463 gcc_unreachable ();
38466 else
38468 switch (n)
38470 case 32:
38471 return SFmode;
38473 case 64:
38474 return DFmode;
38476 case 128:
38477 if (TARGET_FLOAT128_KEYWORD)
38478 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
38479 else
38480 return VOIDmode;
38482 default:
38483 return VOIDmode;
38489 /* Target hook for c_mode_for_suffix. */
38490 static machine_mode
38491 rs6000_c_mode_for_suffix (char suffix)
38493 if (TARGET_FLOAT128_TYPE)
38495 if (suffix == 'q' || suffix == 'Q')
38496 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
38498 /* At the moment, we are not defining a suffix for IBM extended double.
38499 If/when the default for -mabi=ieeelongdouble is changed, and we want
38500 to support __ibm128 constants in legacy library code, we may need to
38501 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
38502 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
38503 __float80 constants. */
38506 return VOIDmode;
38509 /* Target hook for invalid_arg_for_unprototyped_fn. */
38510 static const char *
38511 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
38513 return (!rs6000_darwin64_abi
38514 && typelist == 0
38515 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
38516 && (funcdecl == NULL_TREE
38517 || (TREE_CODE (funcdecl) == FUNCTION_DECL
38518 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
38519 ? N_("AltiVec argument passed to unprototyped function")
38520 : NULL;
38523 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
38524 setup by using __stack_chk_fail_local hidden function instead of
38525 calling __stack_chk_fail directly. Otherwise it is better to call
38526 __stack_chk_fail directly. */
38528 static tree ATTRIBUTE_UNUSED
38529 rs6000_stack_protect_fail (void)
38531 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
38532 ? default_hidden_stack_protect_fail ()
38533 : default_external_stack_protect_fail ();
38536 void
38537 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
38538 int num_operands ATTRIBUTE_UNUSED)
38540 if (rs6000_warn_cell_microcode)
38542 const char *temp;
38543 int insn_code_number = recog_memoized (insn);
38544 location_t location = INSN_LOCATION (insn);
38546 /* Punt on insns we cannot recognize. */
38547 if (insn_code_number < 0)
38548 return;
38550 temp = get_insn_template (insn_code_number, insn);
38552 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
38553 warning_at (location, OPT_mwarn_cell_microcode,
38554 "emitting microcode insn %s\t[%s] #%d",
38555 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
38556 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
38557 warning_at (location, OPT_mwarn_cell_microcode,
38558 "emitting conditional microcode insn %s\t[%s] #%d",
38559 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
38563 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
38565 #if TARGET_ELF
38566 static unsigned HOST_WIDE_INT
38567 rs6000_asan_shadow_offset (void)
38569 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
38571 #endif
38573 /* Mask options that we want to support inside of attribute((target)) and
38574 #pragma GCC target operations. Note, we do not include things like
38575 64/32-bit, endianness, hard/soft floating point, etc. that would have
38576 different calling sequences. */
38578 struct rs6000_opt_mask {
38579 const char *name; /* option name */
38580 HOST_WIDE_INT mask; /* mask to set */
38581 bool invert; /* invert sense of mask */
38582 bool valid_target; /* option is a target option */
38585 static struct rs6000_opt_mask const rs6000_opt_masks[] =
38587 { "altivec", OPTION_MASK_ALTIVEC, false, true },
38588 { "cmpb", OPTION_MASK_CMPB, false, true },
38589 { "crypto", OPTION_MASK_CRYPTO, false, true },
38590 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
38591 { "dlmzb", OPTION_MASK_DLMZB, false, true },
38592 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
38593 false, true },
38594 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
38595 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
38596 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
38597 { "fprnd", OPTION_MASK_FPRND, false, true },
38598 { "hard-dfp", OPTION_MASK_DFP, false, true },
38599 { "htm", OPTION_MASK_HTM, false, true },
38600 { "isel", OPTION_MASK_ISEL, false, true },
38601 { "mfcrf", OPTION_MASK_MFCRF, false, true },
38602 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
38603 { "modulo", OPTION_MASK_MODULO, false, true },
38604 { "mulhw", OPTION_MASK_MULHW, false, true },
38605 { "multiple", OPTION_MASK_MULTIPLE, false, true },
38606 { "popcntb", OPTION_MASK_POPCNTB, false, true },
38607 { "popcntd", OPTION_MASK_POPCNTD, false, true },
38608 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
38609 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
38610 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
38611 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
38612 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
38613 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
38614 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
38615 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
38616 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
38617 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
38618 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
38619 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
38620 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
38621 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
38622 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
38623 { "string", OPTION_MASK_STRING, false, true },
38624 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
38625 { "update", OPTION_MASK_NO_UPDATE, true , true },
38626 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
38627 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
38628 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
38629 { "vsx", OPTION_MASK_VSX, false, true },
38630 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
38631 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
38632 #ifdef OPTION_MASK_64BIT
38633 #if TARGET_AIX_OS
38634 { "aix64", OPTION_MASK_64BIT, false, false },
38635 { "aix32", OPTION_MASK_64BIT, true, false },
38636 #else
38637 { "64", OPTION_MASK_64BIT, false, false },
38638 { "32", OPTION_MASK_64BIT, true, false },
38639 #endif
38640 #endif
38641 #ifdef OPTION_MASK_EABI
38642 { "eabi", OPTION_MASK_EABI, false, false },
38643 #endif
38644 #ifdef OPTION_MASK_LITTLE_ENDIAN
38645 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
38646 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
38647 #endif
38648 #ifdef OPTION_MASK_RELOCATABLE
38649 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
38650 #endif
38651 #ifdef OPTION_MASK_STRICT_ALIGN
38652 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
38653 #endif
38654 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
38655 { "string", OPTION_MASK_STRING, false, false },
38658 /* Builtin mask mapping for printing the flags. */
38659 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
38661 { "altivec", RS6000_BTM_ALTIVEC, false, false },
38662 { "vsx", RS6000_BTM_VSX, false, false },
38663 { "spe", RS6000_BTM_SPE, false, false },
38664 { "paired", RS6000_BTM_PAIRED, false, false },
38665 { "fre", RS6000_BTM_FRE, false, false },
38666 { "fres", RS6000_BTM_FRES, false, false },
38667 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
38668 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
38669 { "popcntd", RS6000_BTM_POPCNTD, false, false },
38670 { "cell", RS6000_BTM_CELL, false, false },
38671 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
38672 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
38673 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
38674 { "crypto", RS6000_BTM_CRYPTO, false, false },
38675 { "htm", RS6000_BTM_HTM, false, false },
38676 { "hard-dfp", RS6000_BTM_DFP, false, false },
38677 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
38678 { "long-double-128", RS6000_BTM_LDBL128, false, false },
38679 { "float128", RS6000_BTM_FLOAT128, false, false },
38682 /* Option variables that we want to support inside attribute((target)) and
38683 #pragma GCC target operations. */
38685 struct rs6000_opt_var {
38686 const char *name; /* option name */
38687 size_t global_offset; /* offset of the option in global_options. */
38688 size_t target_offset; /* offset of the option in target options. */
38691 static struct rs6000_opt_var const rs6000_opt_vars[] =
38693 { "friz",
38694 offsetof (struct gcc_options, x_TARGET_FRIZ),
38695 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
38696 { "avoid-indexed-addresses",
38697 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
38698 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
38699 { "paired",
38700 offsetof (struct gcc_options, x_rs6000_paired_float),
38701 offsetof (struct cl_target_option, x_rs6000_paired_float), },
38702 { "longcall",
38703 offsetof (struct gcc_options, x_rs6000_default_long_calls),
38704 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
38705 { "optimize-swaps",
38706 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
38707 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
38708 { "allow-movmisalign",
38709 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
38710 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
38711 { "allow-df-permute",
38712 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
38713 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
38714 { "sched-groups",
38715 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
38716 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
38717 { "always-hint",
38718 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
38719 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
38720 { "align-branch-targets",
38721 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
38722 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
38723 { "vectorize-builtins",
38724 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
38725 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
38726 { "tls-markers",
38727 offsetof (struct gcc_options, x_tls_markers),
38728 offsetof (struct cl_target_option, x_tls_markers), },
38729 { "sched-prolog",
38730 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
38731 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
38732 { "sched-epilog",
38733 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
38734 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
38735 { "gen-cell-microcode",
38736 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
38737 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
38738 { "warn-cell-microcode",
38739 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
38740 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
38743 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
38744 parsing. Return true if there were no errors. */
38746 static bool
38747 rs6000_inner_target_options (tree args, bool attr_p)
38749 bool ret = true;
38751 if (args == NULL_TREE)
38754 else if (TREE_CODE (args) == STRING_CST)
38756 char *p = ASTRDUP (TREE_STRING_POINTER (args));
38757 char *q;
38759 while ((q = strtok (p, ",")) != NULL)
38761 bool error_p = false;
38762 bool not_valid_p = false;
38763 const char *cpu_opt = NULL;
38765 p = NULL;
38766 if (strncmp (q, "cpu=", 4) == 0)
38768 int cpu_index = rs6000_cpu_name_lookup (q+4);
38769 if (cpu_index >= 0)
38770 rs6000_cpu_index = cpu_index;
38771 else
38773 error_p = true;
38774 cpu_opt = q+4;
38777 else if (strncmp (q, "tune=", 5) == 0)
38779 int tune_index = rs6000_cpu_name_lookup (q+5);
38780 if (tune_index >= 0)
38781 rs6000_tune_index = tune_index;
38782 else
38784 error_p = true;
38785 cpu_opt = q+5;
38788 else
38790 size_t i;
38791 bool invert = false;
38792 char *r = q;
38794 error_p = true;
38795 if (strncmp (r, "no-", 3) == 0)
38797 invert = true;
38798 r += 3;
38801 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
38802 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
38804 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
38806 if (!rs6000_opt_masks[i].valid_target)
38807 not_valid_p = true;
38808 else
38810 error_p = false;
38811 rs6000_isa_flags_explicit |= mask;
38813 /* VSX needs altivec, so -mvsx automagically sets
38814 altivec and disables -mavoid-indexed-addresses. */
38815 if (!invert)
38817 if (mask == OPTION_MASK_VSX)
38819 mask |= OPTION_MASK_ALTIVEC;
38820 TARGET_AVOID_XFORM = 0;
38824 if (rs6000_opt_masks[i].invert)
38825 invert = !invert;
38827 if (invert)
38828 rs6000_isa_flags &= ~mask;
38829 else
38830 rs6000_isa_flags |= mask;
38832 break;
38835 if (error_p && !not_valid_p)
38837 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
38838 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
38840 size_t j = rs6000_opt_vars[i].global_offset;
38841 *((int *) ((char *)&global_options + j)) = !invert;
38842 error_p = false;
38843 not_valid_p = false;
38844 break;
38849 if (error_p)
38851 const char *eprefix, *esuffix;
38853 ret = false;
38854 if (attr_p)
38856 eprefix = "__attribute__((__target__(";
38857 esuffix = ")))";
38859 else
38861 eprefix = "#pragma GCC target ";
38862 esuffix = "";
38865 if (cpu_opt)
38866 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
38867 q, esuffix);
38868 else if (not_valid_p)
38869 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
38870 else
38871 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
38876 else if (TREE_CODE (args) == TREE_LIST)
38880 tree value = TREE_VALUE (args);
38881 if (value)
38883 bool ret2 = rs6000_inner_target_options (value, attr_p);
38884 if (!ret2)
38885 ret = false;
38887 args = TREE_CHAIN (args);
38889 while (args != NULL_TREE);
38892 else
38893 gcc_unreachable ();
38895 return ret;
38898 /* Print out the target options as a list for -mdebug=target. */
38900 static void
38901 rs6000_debug_target_options (tree args, const char *prefix)
38903 if (args == NULL_TREE)
38904 fprintf (stderr, "%s<NULL>", prefix);
38906 else if (TREE_CODE (args) == STRING_CST)
38908 char *p = ASTRDUP (TREE_STRING_POINTER (args));
38909 char *q;
38911 while ((q = strtok (p, ",")) != NULL)
38913 p = NULL;
38914 fprintf (stderr, "%s\"%s\"", prefix, q);
38915 prefix = ", ";
38919 else if (TREE_CODE (args) == TREE_LIST)
38923 tree value = TREE_VALUE (args);
38924 if (value)
38926 rs6000_debug_target_options (value, prefix);
38927 prefix = ", ";
38929 args = TREE_CHAIN (args);
38931 while (args != NULL_TREE);
38934 else
38935 gcc_unreachable ();
38937 return;
38941 /* Hook to validate attribute((target("..."))). */
38943 static bool
38944 rs6000_valid_attribute_p (tree fndecl,
38945 tree ARG_UNUSED (name),
38946 tree args,
38947 int flags)
38949 struct cl_target_option cur_target;
38950 bool ret;
38951 tree old_optimize = build_optimization_node (&global_options);
38952 tree new_target, new_optimize;
38953 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
38955 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
38957 if (TARGET_DEBUG_TARGET)
38959 tree tname = DECL_NAME (fndecl);
38960 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
38961 if (tname)
38962 fprintf (stderr, "function: %.*s\n",
38963 (int) IDENTIFIER_LENGTH (tname),
38964 IDENTIFIER_POINTER (tname));
38965 else
38966 fprintf (stderr, "function: unknown\n");
38968 fprintf (stderr, "args:");
38969 rs6000_debug_target_options (args, " ");
38970 fprintf (stderr, "\n");
38972 if (flags)
38973 fprintf (stderr, "flags: 0x%x\n", flags);
38975 fprintf (stderr, "--------------------\n");
38978 old_optimize = build_optimization_node (&global_options);
38979 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
38981 /* If the function changed the optimization levels as well as setting target
38982 options, start with the optimizations specified. */
38983 if (func_optimize && func_optimize != old_optimize)
38984 cl_optimization_restore (&global_options,
38985 TREE_OPTIMIZATION (func_optimize));
38987 /* The target attributes may also change some optimization flags, so update
38988 the optimization options if necessary. */
38989 cl_target_option_save (&cur_target, &global_options);
38990 rs6000_cpu_index = rs6000_tune_index = -1;
38991 ret = rs6000_inner_target_options (args, true);
38993 /* Set up any additional state. */
38994 if (ret)
38996 ret = rs6000_option_override_internal (false);
38997 new_target = build_target_option_node (&global_options);
38999 else
39000 new_target = NULL;
39002 new_optimize = build_optimization_node (&global_options);
39004 if (!new_target)
39005 ret = false;
39007 else if (fndecl)
39009 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
39011 if (old_optimize != new_optimize)
39012 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
39015 cl_target_option_restore (&global_options, &cur_target);
39017 if (old_optimize != new_optimize)
39018 cl_optimization_restore (&global_options,
39019 TREE_OPTIMIZATION (old_optimize));
39021 return ret;
39025 /* Hook to validate the current #pragma GCC target and set the state, and
39026 update the macros based on what was changed. If ARGS is NULL, then
39027 POP_TARGET is used to reset the options. */
39029 bool
39030 rs6000_pragma_target_parse (tree args, tree pop_target)
39032 tree prev_tree = build_target_option_node (&global_options);
39033 tree cur_tree;
39034 struct cl_target_option *prev_opt, *cur_opt;
39035 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
39036 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
39038 if (TARGET_DEBUG_TARGET)
39040 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
39041 fprintf (stderr, "args:");
39042 rs6000_debug_target_options (args, " ");
39043 fprintf (stderr, "\n");
39045 if (pop_target)
39047 fprintf (stderr, "pop_target:\n");
39048 debug_tree (pop_target);
39050 else
39051 fprintf (stderr, "pop_target: <NULL>\n");
39053 fprintf (stderr, "--------------------\n");
39056 if (! args)
39058 cur_tree = ((pop_target)
39059 ? pop_target
39060 : target_option_default_node);
39061 cl_target_option_restore (&global_options,
39062 TREE_TARGET_OPTION (cur_tree));
39064 else
39066 rs6000_cpu_index = rs6000_tune_index = -1;
39067 if (!rs6000_inner_target_options (args, false)
39068 || !rs6000_option_override_internal (false)
39069 || (cur_tree = build_target_option_node (&global_options))
39070 == NULL_TREE)
39072 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
39073 fprintf (stderr, "invalid pragma\n");
39075 return false;
39079 target_option_current_node = cur_tree;
39081 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39082 change the macros that are defined. */
39083 if (rs6000_target_modify_macros_ptr)
39085 prev_opt = TREE_TARGET_OPTION (prev_tree);
39086 prev_bumask = prev_opt->x_rs6000_builtin_mask;
39087 prev_flags = prev_opt->x_rs6000_isa_flags;
39089 cur_opt = TREE_TARGET_OPTION (cur_tree);
39090 cur_flags = cur_opt->x_rs6000_isa_flags;
39091 cur_bumask = cur_opt->x_rs6000_builtin_mask;
39093 diff_bumask = (prev_bumask ^ cur_bumask);
39094 diff_flags = (prev_flags ^ cur_flags);
39096 if ((diff_flags != 0) || (diff_bumask != 0))
39098 /* Delete old macros. */
39099 rs6000_target_modify_macros_ptr (false,
39100 prev_flags & diff_flags,
39101 prev_bumask & diff_bumask);
39103 /* Define new macros. */
39104 rs6000_target_modify_macros_ptr (true,
39105 cur_flags & diff_flags,
39106 cur_bumask & diff_bumask);
39110 return true;
39114 /* Remember the last target of rs6000_set_current_function. */
39115 static GTY(()) tree rs6000_previous_fndecl;
39117 /* Establish appropriate back-end context for processing the function
39118 FNDECL. The argument might be NULL to indicate processing at top
39119 level, outside of any function scope. */
39120 static void
39121 rs6000_set_current_function (tree fndecl)
39123 tree old_tree = (rs6000_previous_fndecl
39124 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
39125 : NULL_TREE);
39127 tree new_tree = (fndecl
39128 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
39129 : NULL_TREE);
39131 if (TARGET_DEBUG_TARGET)
39133 bool print_final = false;
39134 fprintf (stderr, "\n==================== rs6000_set_current_function");
39136 if (fndecl)
39137 fprintf (stderr, ", fndecl %s (%p)",
39138 (DECL_NAME (fndecl)
39139 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
39140 : "<unknown>"), (void *)fndecl);
39142 if (rs6000_previous_fndecl)
39143 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
39145 fprintf (stderr, "\n");
39146 if (new_tree)
39148 fprintf (stderr, "\nnew fndecl target specific options:\n");
39149 debug_tree (new_tree);
39150 print_final = true;
39153 if (old_tree)
39155 fprintf (stderr, "\nold fndecl target specific options:\n");
39156 debug_tree (old_tree);
39157 print_final = true;
39160 if (print_final)
39161 fprintf (stderr, "--------------------\n");
39164 /* Only change the context if the function changes. This hook is called
39165 several times in the course of compiling a function, and we don't want to
39166 slow things down too much or call target_reinit when it isn't safe. */
39167 if (fndecl && fndecl != rs6000_previous_fndecl)
39169 rs6000_previous_fndecl = fndecl;
39170 if (old_tree == new_tree)
39173 else if (new_tree && new_tree != target_option_default_node)
39175 cl_target_option_restore (&global_options,
39176 TREE_TARGET_OPTION (new_tree));
39177 if (TREE_TARGET_GLOBALS (new_tree))
39178 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
39179 else
39180 TREE_TARGET_GLOBALS (new_tree)
39181 = save_target_globals_default_opts ();
39184 else if (old_tree && old_tree != target_option_default_node)
39186 new_tree = target_option_current_node;
39187 cl_target_option_restore (&global_options,
39188 TREE_TARGET_OPTION (new_tree));
39189 if (TREE_TARGET_GLOBALS (new_tree))
39190 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
39191 else if (new_tree == target_option_default_node)
39192 restore_target_globals (&default_target_globals);
39193 else
39194 TREE_TARGET_GLOBALS (new_tree)
39195 = save_target_globals_default_opts ();
39201 /* Save the current options */
39203 static void
39204 rs6000_function_specific_save (struct cl_target_option *ptr,
39205 struct gcc_options *opts)
39207 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
39208 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
39211 /* Restore the current options */
39213 static void
39214 rs6000_function_specific_restore (struct gcc_options *opts,
39215 struct cl_target_option *ptr)
39218 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
39219 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
39220 (void) rs6000_option_override_internal (false);
39223 /* Print the current options */
39225 static void
39226 rs6000_function_specific_print (FILE *file, int indent,
39227 struct cl_target_option *ptr)
39229 rs6000_print_isa_options (file, indent, "Isa options set",
39230 ptr->x_rs6000_isa_flags);
39232 rs6000_print_isa_options (file, indent, "Isa options explicit",
39233 ptr->x_rs6000_isa_flags_explicit);
39236 /* Helper function to print the current isa or misc options on a line. */
39238 static void
39239 rs6000_print_options_internal (FILE *file,
39240 int indent,
39241 const char *string,
39242 HOST_WIDE_INT flags,
39243 const char *prefix,
39244 const struct rs6000_opt_mask *opts,
39245 size_t num_elements)
39247 size_t i;
39248 size_t start_column = 0;
39249 size_t cur_column;
39250 size_t max_column = 120;
39251 size_t prefix_len = strlen (prefix);
39252 size_t comma_len = 0;
39253 const char *comma = "";
39255 if (indent)
39256 start_column += fprintf (file, "%*s", indent, "");
39258 if (!flags)
39260 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
39261 return;
39264 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
39266 /* Print the various mask options. */
39267 cur_column = start_column;
39268 for (i = 0; i < num_elements; i++)
39270 bool invert = opts[i].invert;
39271 const char *name = opts[i].name;
39272 const char *no_str = "";
39273 HOST_WIDE_INT mask = opts[i].mask;
39274 size_t len = comma_len + prefix_len + strlen (name);
39276 if (!invert)
39278 if ((flags & mask) == 0)
39280 no_str = "no-";
39281 len += sizeof ("no-") - 1;
39284 flags &= ~mask;
39287 else
39289 if ((flags & mask) != 0)
39291 no_str = "no-";
39292 len += sizeof ("no-") - 1;
39295 flags |= mask;
39298 cur_column += len;
39299 if (cur_column > max_column)
39301 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
39302 cur_column = start_column + len;
39303 comma = "";
39306 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
39307 comma = ", ";
39308 comma_len = sizeof (", ") - 1;
39311 fputs ("\n", file);
39314 /* Helper function to print the current isa options on a line. */
39316 static void
39317 rs6000_print_isa_options (FILE *file, int indent, const char *string,
39318 HOST_WIDE_INT flags)
39320 rs6000_print_options_internal (file, indent, string, flags, "-m",
39321 &rs6000_opt_masks[0],
39322 ARRAY_SIZE (rs6000_opt_masks));
39325 static void
39326 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
39327 HOST_WIDE_INT flags)
39329 rs6000_print_options_internal (file, indent, string, flags, "",
39330 &rs6000_builtin_mask_names[0],
39331 ARRAY_SIZE (rs6000_builtin_mask_names));
39335 /* Hook to determine if one function can safely inline another. */
39337 static bool
39338 rs6000_can_inline_p (tree caller, tree callee)
39340 bool ret = false;
39341 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
39342 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
39344 /* If callee has no option attributes, then it is ok to inline. */
39345 if (!callee_tree)
39346 ret = true;
39348 /* If caller has no option attributes, but callee does then it is not ok to
39349 inline. */
39350 else if (!caller_tree)
39351 ret = false;
39353 else
39355 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
39356 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
39358 /* Callee's options should a subset of the caller's, i.e. a vsx function
39359 can inline an altivec function but a non-vsx function can't inline a
39360 vsx function. */
39361 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
39362 == callee_opts->x_rs6000_isa_flags)
39363 ret = true;
39366 if (TARGET_DEBUG_TARGET)
39367 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
39368 (DECL_NAME (caller)
39369 ? IDENTIFIER_POINTER (DECL_NAME (caller))
39370 : "<unknown>"),
39371 (DECL_NAME (callee)
39372 ? IDENTIFIER_POINTER (DECL_NAME (callee))
39373 : "<unknown>"),
39374 (ret ? "can" : "cannot"));
39376 return ret;
39379 /* Allocate a stack temp and fixup the address so it meets the particular
39380 memory requirements (either offetable or REG+REG addressing). */
39383 rs6000_allocate_stack_temp (machine_mode mode,
39384 bool offsettable_p,
39385 bool reg_reg_p)
39387 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
39388 rtx addr = XEXP (stack, 0);
39389 int strict_p = (reload_in_progress || reload_completed);
39391 if (!legitimate_indirect_address_p (addr, strict_p))
39393 if (offsettable_p
39394 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
39395 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
39397 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
39398 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
39401 return stack;
39404 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
39405 to such a form to deal with memory reference instructions like STFIWX that
39406 only take reg+reg addressing. */
39409 rs6000_address_for_fpconvert (rtx x)
39411 int strict_p = (reload_in_progress || reload_completed);
39412 rtx addr;
39414 gcc_assert (MEM_P (x));
39415 addr = XEXP (x, 0);
39416 if (! legitimate_indirect_address_p (addr, strict_p)
39417 && ! legitimate_indexed_address_p (addr, strict_p))
39419 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
39421 rtx reg = XEXP (addr, 0);
39422 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
39423 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
39424 gcc_assert (REG_P (reg));
39425 emit_insn (gen_add3_insn (reg, reg, size_rtx));
39426 addr = reg;
39428 else if (GET_CODE (addr) == PRE_MODIFY)
39430 rtx reg = XEXP (addr, 0);
39431 rtx expr = XEXP (addr, 1);
39432 gcc_assert (REG_P (reg));
39433 gcc_assert (GET_CODE (expr) == PLUS);
39434 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
39435 addr = reg;
39438 x = replace_equiv_address (x, copy_addr_to_reg (addr));
39441 return x;
39444 /* Given a memory reference, if it is not in the form for altivec memory
39445 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
39446 convert to the altivec format. */
39449 rs6000_address_for_altivec (rtx x)
39451 gcc_assert (MEM_P (x));
39452 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
39454 rtx addr = XEXP (x, 0);
39455 int strict_p = (reload_in_progress || reload_completed);
39457 if (!legitimate_indexed_address_p (addr, strict_p)
39458 && !legitimate_indirect_address_p (addr, strict_p))
39459 addr = copy_to_mode_reg (Pmode, addr);
39461 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
39462 x = change_address (x, GET_MODE (x), addr);
39465 return x;
39468 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
39470 On the RS/6000, all integer constants are acceptable, most won't be valid
39471 for particular insns, though. Only easy FP constants are acceptable. */
39473 static bool
39474 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
39476 if (TARGET_ELF && tls_referenced_p (x))
39477 return false;
39479 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
39480 || GET_MODE (x) == VOIDmode
39481 || (TARGET_POWERPC64 && mode == DImode)
39482 || easy_fp_constant (x, mode)
39483 || easy_vector_constant (x, mode));
39487 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
39489 static bool
39490 chain_already_loaded (rtx_insn *last)
39492 for (; last != NULL; last = PREV_INSN (last))
39494 if (NONJUMP_INSN_P (last))
39496 rtx patt = PATTERN (last);
39498 if (GET_CODE (patt) == SET)
39500 rtx lhs = XEXP (patt, 0);
39502 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
39503 return true;
39507 return false;
39510 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
39512 void
39513 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
39515 const bool direct_call_p
39516 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
39517 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
39518 rtx toc_load = NULL_RTX;
39519 rtx toc_restore = NULL_RTX;
39520 rtx func_addr;
39521 rtx abi_reg = NULL_RTX;
39522 rtx call[4];
39523 int n_call;
39524 rtx insn;
39526 /* Handle longcall attributes. */
39527 if (INTVAL (cookie) & CALL_LONG)
39528 func_desc = rs6000_longcall_ref (func_desc);
39530 /* Handle indirect calls. */
39531 if (GET_CODE (func_desc) != SYMBOL_REF
39532 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
39534 /* Save the TOC into its reserved slot before the call,
39535 and prepare to restore it after the call. */
39536 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
39537 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
39538 rtx stack_toc_mem = gen_frame_mem (Pmode,
39539 gen_rtx_PLUS (Pmode, stack_ptr,
39540 stack_toc_offset));
39541 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
39542 gen_rtvec (1, stack_toc_offset),
39543 UNSPEC_TOCSLOT);
39544 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
39546 /* Can we optimize saving the TOC in the prologue or
39547 do we need to do it at every call? */
39548 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
39549 cfun->machine->save_toc_in_prologue = true;
39550 else
39552 MEM_VOLATILE_P (stack_toc_mem) = 1;
39553 emit_move_insn (stack_toc_mem, toc_reg);
39556 if (DEFAULT_ABI == ABI_ELFv2)
39558 /* A function pointer in the ELFv2 ABI is just a plain address, but
39559 the ABI requires it to be loaded into r12 before the call. */
39560 func_addr = gen_rtx_REG (Pmode, 12);
39561 emit_move_insn (func_addr, func_desc);
39562 abi_reg = func_addr;
39564 else
39566 /* A function pointer under AIX is a pointer to a data area whose
39567 first word contains the actual address of the function, whose
39568 second word contains a pointer to its TOC, and whose third word
39569 contains a value to place in the static chain register (r11).
39570 Note that if we load the static chain, our "trampoline" need
39571 not have any executable code. */
39573 /* Load up address of the actual function. */
39574 func_desc = force_reg (Pmode, func_desc);
39575 func_addr = gen_reg_rtx (Pmode);
39576 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
39578 /* Prepare to load the TOC of the called function. Note that the
39579 TOC load must happen immediately before the actual call so
39580 that unwinding the TOC registers works correctly. See the
39581 comment in frob_update_context. */
39582 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
39583 rtx func_toc_mem = gen_rtx_MEM (Pmode,
39584 gen_rtx_PLUS (Pmode, func_desc,
39585 func_toc_offset));
39586 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
39588 /* If we have a static chain, load it up. But, if the call was
39589 originally direct, the 3rd word has not been written since no
39590 trampoline has been built, so we ought not to load it, lest we
39591 override a static chain value. */
39592 if (!direct_call_p
39593 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
39594 && !chain_already_loaded (get_current_sequence ()->next->last))
39596 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
39597 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
39598 rtx func_sc_mem = gen_rtx_MEM (Pmode,
39599 gen_rtx_PLUS (Pmode, func_desc,
39600 func_sc_offset));
39601 emit_move_insn (sc_reg, func_sc_mem);
39602 abi_reg = sc_reg;
39606 else
39608 /* Direct calls use the TOC: for local calls, the callee will
39609 assume the TOC register is set; for non-local calls, the
39610 PLT stub needs the TOC register. */
39611 abi_reg = toc_reg;
39612 func_addr = func_desc;
39615 /* Create the call. */
39616 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
39617 if (value != NULL_RTX)
39618 call[0] = gen_rtx_SET (value, call[0]);
39619 n_call = 1;
39621 if (toc_load)
39622 call[n_call++] = toc_load;
39623 if (toc_restore)
39624 call[n_call++] = toc_restore;
39626 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
39628 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
39629 insn = emit_call_insn (insn);
39631 /* Mention all registers defined by the ABI to hold information
39632 as uses in CALL_INSN_FUNCTION_USAGE. */
39633 if (abi_reg)
39634 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
39637 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
39639 void
39640 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
39642 rtx call[2];
39643 rtx insn;
39645 gcc_assert (INTVAL (cookie) == 0);
39647 /* Create the call. */
39648 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
39649 if (value != NULL_RTX)
39650 call[0] = gen_rtx_SET (value, call[0]);
39652 call[1] = simple_return_rtx;
39654 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
39655 insn = emit_call_insn (insn);
39657 /* Note use of the TOC register. */
39658 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
39661 /* Return whether we need to always update the saved TOC pointer when we update
39662 the stack pointer. */
39664 static bool
39665 rs6000_save_toc_in_prologue_p (void)
39667 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
39670 #ifdef HAVE_GAS_HIDDEN
39671 # define USE_HIDDEN_LINKONCE 1
39672 #else
39673 # define USE_HIDDEN_LINKONCE 0
39674 #endif
39676 /* Fills in the label name that should be used for a 476 link stack thunk. */
39678 void
39679 get_ppc476_thunk_name (char name[32])
39681 gcc_assert (TARGET_LINK_STACK);
39683 if (USE_HIDDEN_LINKONCE)
39684 sprintf (name, "__ppc476.get_thunk");
39685 else
39686 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
39689 /* This function emits the simple thunk routine that is used to preserve
39690 the link stack on the 476 cpu. */
39692 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
39693 static void
39694 rs6000_code_end (void)
39696 char name[32];
39697 tree decl;
39699 if (!TARGET_LINK_STACK)
39700 return;
39702 get_ppc476_thunk_name (name);
39704 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
39705 build_function_type_list (void_type_node, NULL_TREE));
39706 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
39707 NULL_TREE, void_type_node);
39708 TREE_PUBLIC (decl) = 1;
39709 TREE_STATIC (decl) = 1;
39711 #if RS6000_WEAK
39712 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
39714 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
39715 targetm.asm_out.unique_section (decl, 0);
39716 switch_to_section (get_named_section (decl, NULL, 0));
39717 DECL_WEAK (decl) = 1;
39718 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
39719 targetm.asm_out.globalize_label (asm_out_file, name);
39720 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
39721 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
39723 else
39724 #endif
39726 switch_to_section (text_section);
39727 ASM_OUTPUT_LABEL (asm_out_file, name);
39730 DECL_INITIAL (decl) = make_node (BLOCK);
39731 current_function_decl = decl;
39732 allocate_struct_function (decl, false);
39733 init_function_start (decl);
39734 first_function_block_is_cold = false;
39735 /* Make sure unwind info is emitted for the thunk if needed. */
39736 final_start_function (emit_barrier (), asm_out_file, 1);
39738 fputs ("\tblr\n", asm_out_file);
39740 final_end_function ();
39741 init_insn_lengths ();
39742 free_after_compilation (cfun);
39743 set_cfun (NULL);
39744 current_function_decl = NULL;
39747 /* Add r30 to hard reg set if the prologue sets it up and it is not
39748 pic_offset_table_rtx. */
39750 static void
39751 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
39753 if (!TARGET_SINGLE_PIC_BASE
39754 && TARGET_TOC
39755 && TARGET_MINIMAL_TOC
39756 && !constant_pool_empty_p ())
39757 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
39758 if (cfun->machine->split_stack_argp_used)
39759 add_to_hard_reg_set (&set->set, Pmode, 12);
39763 /* Helper function for rs6000_split_logical to emit a logical instruction after
39764 spliting the operation to single GPR registers.
39766 DEST is the destination register.
39767 OP1 and OP2 are the input source registers.
39768 CODE is the base operation (AND, IOR, XOR, NOT).
39769 MODE is the machine mode.
39770 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
39771 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
39772 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
39774 static void
39775 rs6000_split_logical_inner (rtx dest,
39776 rtx op1,
39777 rtx op2,
39778 enum rtx_code code,
39779 machine_mode mode,
39780 bool complement_final_p,
39781 bool complement_op1_p,
39782 bool complement_op2_p)
39784 rtx bool_rtx;
39786 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
39787 if (op2 && GET_CODE (op2) == CONST_INT
39788 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
39789 && !complement_final_p && !complement_op1_p && !complement_op2_p)
39791 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
39792 HOST_WIDE_INT value = INTVAL (op2) & mask;
39794 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
39795 if (code == AND)
39797 if (value == 0)
39799 emit_insn (gen_rtx_SET (dest, const0_rtx));
39800 return;
39803 else if (value == mask)
39805 if (!rtx_equal_p (dest, op1))
39806 emit_insn (gen_rtx_SET (dest, op1));
39807 return;
39811 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
39812 into separate ORI/ORIS or XORI/XORIS instrucitons. */
39813 else if (code == IOR || code == XOR)
39815 if (value == 0)
39817 if (!rtx_equal_p (dest, op1))
39818 emit_insn (gen_rtx_SET (dest, op1));
39819 return;
39824 if (code == AND && mode == SImode
39825 && !complement_final_p && !complement_op1_p && !complement_op2_p)
39827 emit_insn (gen_andsi3 (dest, op1, op2));
39828 return;
39831 if (complement_op1_p)
39832 op1 = gen_rtx_NOT (mode, op1);
39834 if (complement_op2_p)
39835 op2 = gen_rtx_NOT (mode, op2);
39837 /* For canonical RTL, if only one arm is inverted it is the first. */
39838 if (!complement_op1_p && complement_op2_p)
39839 std::swap (op1, op2);
39841 bool_rtx = ((code == NOT)
39842 ? gen_rtx_NOT (mode, op1)
39843 : gen_rtx_fmt_ee (code, mode, op1, op2));
39845 if (complement_final_p)
39846 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
39848 emit_insn (gen_rtx_SET (dest, bool_rtx));
39851 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
39852 operations are split immediately during RTL generation to allow for more
39853 optimizations of the AND/IOR/XOR.
39855 OPERANDS is an array containing the destination and two input operands.
39856 CODE is the base operation (AND, IOR, XOR, NOT).
39857 MODE is the machine mode.
39858 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
39859 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
39860 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
39861 CLOBBER_REG is either NULL or a scratch register of type CC to allow
39862 formation of the AND instructions. */
39864 static void
39865 rs6000_split_logical_di (rtx operands[3],
39866 enum rtx_code code,
39867 bool complement_final_p,
39868 bool complement_op1_p,
39869 bool complement_op2_p)
39871 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
39872 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
39873 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
39874 enum hi_lo { hi = 0, lo = 1 };
39875 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
39876 size_t i;
39878 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
39879 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
39880 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
39881 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
39883 if (code == NOT)
39884 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
39885 else
39887 if (GET_CODE (operands[2]) != CONST_INT)
39889 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
39890 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
39892 else
39894 HOST_WIDE_INT value = INTVAL (operands[2]);
39895 HOST_WIDE_INT value_hi_lo[2];
39897 gcc_assert (!complement_final_p);
39898 gcc_assert (!complement_op1_p);
39899 gcc_assert (!complement_op2_p);
39901 value_hi_lo[hi] = value >> 32;
39902 value_hi_lo[lo] = value & lower_32bits;
39904 for (i = 0; i < 2; i++)
39906 HOST_WIDE_INT sub_value = value_hi_lo[i];
39908 if (sub_value & sign_bit)
39909 sub_value |= upper_32bits;
39911 op2_hi_lo[i] = GEN_INT (sub_value);
39913 /* If this is an AND instruction, check to see if we need to load
39914 the value in a register. */
39915 if (code == AND && sub_value != -1 && sub_value != 0
39916 && !and_operand (op2_hi_lo[i], SImode))
39917 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
39922 for (i = 0; i < 2; i++)
39924 /* Split large IOR/XOR operations. */
39925 if ((code == IOR || code == XOR)
39926 && GET_CODE (op2_hi_lo[i]) == CONST_INT
39927 && !complement_final_p
39928 && !complement_op1_p
39929 && !complement_op2_p
39930 && !logical_const_operand (op2_hi_lo[i], SImode))
39932 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
39933 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
39934 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
39935 rtx tmp = gen_reg_rtx (SImode);
39937 /* Make sure the constant is sign extended. */
39938 if ((hi_16bits & sign_bit) != 0)
39939 hi_16bits |= upper_32bits;
39941 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
39942 code, SImode, false, false, false);
39944 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
39945 code, SImode, false, false, false);
39947 else
39948 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
39949 code, SImode, complement_final_p,
39950 complement_op1_p, complement_op2_p);
39953 return;
39956 /* Split the insns that make up boolean operations operating on multiple GPR
39957 registers. The boolean MD patterns ensure that the inputs either are
39958 exactly the same as the output registers, or there is no overlap.
39960 OPERANDS is an array containing the destination and two input operands.
39961 CODE is the base operation (AND, IOR, XOR, NOT).
39962 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
39963 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
39964 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
39966 void
39967 rs6000_split_logical (rtx operands[3],
39968 enum rtx_code code,
39969 bool complement_final_p,
39970 bool complement_op1_p,
39971 bool complement_op2_p)
39973 machine_mode mode = GET_MODE (operands[0]);
39974 machine_mode sub_mode;
39975 rtx op0, op1, op2;
39976 int sub_size, regno0, regno1, nregs, i;
39978 /* If this is DImode, use the specialized version that can run before
39979 register allocation. */
39980 if (mode == DImode && !TARGET_POWERPC64)
39982 rs6000_split_logical_di (operands, code, complement_final_p,
39983 complement_op1_p, complement_op2_p);
39984 return;
39987 op0 = operands[0];
39988 op1 = operands[1];
39989 op2 = (code == NOT) ? NULL_RTX : operands[2];
39990 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
39991 sub_size = GET_MODE_SIZE (sub_mode);
39992 regno0 = REGNO (op0);
39993 regno1 = REGNO (op1);
39995 gcc_assert (reload_completed);
39996 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
39997 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
39999 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
40000 gcc_assert (nregs > 1);
40002 if (op2 && REG_P (op2))
40003 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
40005 for (i = 0; i < nregs; i++)
40007 int offset = i * sub_size;
40008 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
40009 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
40010 rtx sub_op2 = ((code == NOT)
40011 ? NULL_RTX
40012 : simplify_subreg (sub_mode, op2, mode, offset));
40014 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
40015 complement_final_p, complement_op1_p,
40016 complement_op2_p);
40019 return;
40023 /* Return true if the peephole2 can combine a load involving a combination of
40024 an addis instruction and a load with an offset that can be fused together on
40025 a power8. */
40027 bool
40028 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
40029 rtx addis_value, /* addis value. */
40030 rtx target, /* target register that is loaded. */
40031 rtx mem) /* bottom part of the memory addr. */
40033 rtx addr;
40034 rtx base_reg;
40036 /* Validate arguments. */
40037 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40038 return false;
40040 if (!base_reg_operand (target, GET_MODE (target)))
40041 return false;
40043 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40044 return false;
40046 /* Allow sign/zero extension. */
40047 if (GET_CODE (mem) == ZERO_EXTEND
40048 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
40049 mem = XEXP (mem, 0);
40051 if (!MEM_P (mem))
40052 return false;
40054 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
40055 return false;
40057 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
40058 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
40059 return false;
40061 /* Validate that the register used to load the high value is either the
40062 register being loaded, or we can safely replace its use.
40064 This function is only called from the peephole2 pass and we assume that
40065 there are 2 instructions in the peephole (addis and load), so we want to
40066 check if the target register was not used in the memory address and the
40067 register to hold the addis result is dead after the peephole. */
40068 if (REGNO (addis_reg) != REGNO (target))
40070 if (reg_mentioned_p (target, mem))
40071 return false;
40073 if (!peep2_reg_dead_p (2, addis_reg))
40074 return false;
40076 /* If the target register being loaded is the stack pointer, we must
40077 avoid loading any other value into it, even temporarily. */
40078 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
40079 return false;
40082 base_reg = XEXP (addr, 0);
40083 return REGNO (addis_reg) == REGNO (base_reg);
40086 /* During the peephole2 pass, adjust and expand the insns for a load fusion
40087 sequence. We adjust the addis register to use the target register. If the
40088 load sign extends, we adjust the code to do the zero extending load, and an
40089 explicit sign extension later since the fusion only covers zero extending
40090 loads.
40092 The operands are:
40093 operands[0] register set with addis (to be replaced with target)
40094 operands[1] value set via addis
40095 operands[2] target register being loaded
40096 operands[3] D-form memory reference using operands[0]. */
40098 void
40099 expand_fusion_gpr_load (rtx *operands)
40101 rtx addis_value = operands[1];
40102 rtx target = operands[2];
40103 rtx orig_mem = operands[3];
40104 rtx new_addr, new_mem, orig_addr, offset;
40105 enum rtx_code plus_or_lo_sum;
40106 machine_mode target_mode = GET_MODE (target);
40107 machine_mode extend_mode = target_mode;
40108 machine_mode ptr_mode = Pmode;
40109 enum rtx_code extend = UNKNOWN;
40111 if (GET_CODE (orig_mem) == ZERO_EXTEND
40112 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
40114 extend = GET_CODE (orig_mem);
40115 orig_mem = XEXP (orig_mem, 0);
40116 target_mode = GET_MODE (orig_mem);
40119 gcc_assert (MEM_P (orig_mem));
40121 orig_addr = XEXP (orig_mem, 0);
40122 plus_or_lo_sum = GET_CODE (orig_addr);
40123 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
40125 offset = XEXP (orig_addr, 1);
40126 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
40127 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
40129 if (extend != UNKNOWN)
40130 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
40132 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
40133 UNSPEC_FUSION_GPR);
40134 emit_insn (gen_rtx_SET (target, new_mem));
40136 if (extend == SIGN_EXTEND)
40138 int sub_off = ((BYTES_BIG_ENDIAN)
40139 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
40140 : 0);
40141 rtx sign_reg
40142 = simplify_subreg (target_mode, target, extend_mode, sub_off);
40144 emit_insn (gen_rtx_SET (target,
40145 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
40148 return;
40151 /* Emit the addis instruction that will be part of a fused instruction
40152 sequence. */
40154 void
40155 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
40156 const char *mode_name)
40158 rtx fuse_ops[10];
40159 char insn_template[80];
40160 const char *addis_str = NULL;
40161 const char *comment_str = ASM_COMMENT_START;
40163 if (*comment_str == ' ')
40164 comment_str++;
40166 /* Emit the addis instruction. */
40167 fuse_ops[0] = target;
40168 if (satisfies_constraint_L (addis_value))
40170 fuse_ops[1] = addis_value;
40171 addis_str = "lis %0,%v1";
40174 else if (GET_CODE (addis_value) == PLUS)
40176 rtx op0 = XEXP (addis_value, 0);
40177 rtx op1 = XEXP (addis_value, 1);
40179 if (REG_P (op0) && CONST_INT_P (op1)
40180 && satisfies_constraint_L (op1))
40182 fuse_ops[1] = op0;
40183 fuse_ops[2] = op1;
40184 addis_str = "addis %0,%1,%v2";
40188 else if (GET_CODE (addis_value) == HIGH)
40190 rtx value = XEXP (addis_value, 0);
40191 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
40193 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
40194 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
40195 if (TARGET_ELF)
40196 addis_str = "addis %0,%2,%1@toc@ha";
40198 else if (TARGET_XCOFF)
40199 addis_str = "addis %0,%1@u(%2)";
40201 else
40202 gcc_unreachable ();
40205 else if (GET_CODE (value) == PLUS)
40207 rtx op0 = XEXP (value, 0);
40208 rtx op1 = XEXP (value, 1);
40210 if (GET_CODE (op0) == UNSPEC
40211 && XINT (op0, 1) == UNSPEC_TOCREL
40212 && CONST_INT_P (op1))
40214 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
40215 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
40216 fuse_ops[3] = op1;
40217 if (TARGET_ELF)
40218 addis_str = "addis %0,%2,%1+%3@toc@ha";
40220 else if (TARGET_XCOFF)
40221 addis_str = "addis %0,%1+%3@u(%2)";
40223 else
40224 gcc_unreachable ();
40228 else if (satisfies_constraint_L (value))
40230 fuse_ops[1] = value;
40231 addis_str = "lis %0,%v1";
40234 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
40236 fuse_ops[1] = value;
40237 addis_str = "lis %0,%1@ha";
40241 if (!addis_str)
40242 fatal_insn ("Could not generate addis value for fusion", addis_value);
40244 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
40245 comment, mode_name);
40246 output_asm_insn (insn_template, fuse_ops);
40249 /* Emit a D-form load or store instruction that is the second instruction
40250 of a fusion sequence. */
40252 void
40253 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
40254 const char *insn_str)
40256 rtx fuse_ops[10];
40257 char insn_template[80];
40259 fuse_ops[0] = load_store_reg;
40260 fuse_ops[1] = addis_reg;
40262 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
40264 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
40265 fuse_ops[2] = offset;
40266 output_asm_insn (insn_template, fuse_ops);
40269 else if (GET_CODE (offset) == UNSPEC
40270 && XINT (offset, 1) == UNSPEC_TOCREL)
40272 if (TARGET_ELF)
40273 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
40275 else if (TARGET_XCOFF)
40276 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
40278 else
40279 gcc_unreachable ();
40281 fuse_ops[2] = XVECEXP (offset, 0, 0);
40282 output_asm_insn (insn_template, fuse_ops);
40285 else if (GET_CODE (offset) == PLUS
40286 && GET_CODE (XEXP (offset, 0)) == UNSPEC
40287 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
40288 && CONST_INT_P (XEXP (offset, 1)))
40290 rtx tocrel_unspec = XEXP (offset, 0);
40291 if (TARGET_ELF)
40292 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
40294 else if (TARGET_XCOFF)
40295 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
40297 else
40298 gcc_unreachable ();
40300 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
40301 fuse_ops[3] = XEXP (offset, 1);
40302 output_asm_insn (insn_template, fuse_ops);
40305 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
40307 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
40309 fuse_ops[2] = offset;
40310 output_asm_insn (insn_template, fuse_ops);
40313 else
40314 fatal_insn ("Unable to generate load/store offset for fusion", offset);
40316 return;
40319 /* Wrap a TOC address that can be fused to indicate that special fusion
40320 processing is needed. */
40323 fusion_wrap_memory_address (rtx old_mem)
40325 rtx old_addr = XEXP (old_mem, 0);
40326 rtvec v = gen_rtvec (1, old_addr);
40327 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
40328 return replace_equiv_address_nv (old_mem, new_addr, false);
40331 /* Given an address, convert it into the addis and load offset parts. Addresses
40332 created during the peephole2 process look like:
40333 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
40334 (unspec [(...)] UNSPEC_TOCREL))
40336 Addresses created via toc fusion look like:
40337 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
40339 static void
40340 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
40342 rtx hi, lo;
40344 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
40346 lo = XVECEXP (addr, 0, 0);
40347 hi = gen_rtx_HIGH (Pmode, lo);
40349 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
40351 hi = XEXP (addr, 0);
40352 lo = XEXP (addr, 1);
40354 else
40355 gcc_unreachable ();
40357 *p_hi = hi;
40358 *p_lo = lo;
40361 /* Return a string to fuse an addis instruction with a gpr load to the same
40362 register that we loaded up the addis instruction. The address that is used
40363 is the logical address that was formed during peephole2:
40364 (lo_sum (high) (low-part))
40366 Or the address is the TOC address that is wrapped before register allocation:
40367 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
40369 The code is complicated, so we call output_asm_insn directly, and just
40370 return "". */
40372 const char *
40373 emit_fusion_gpr_load (rtx target, rtx mem)
40375 rtx addis_value;
40376 rtx addr;
40377 rtx load_offset;
40378 const char *load_str = NULL;
40379 const char *mode_name = NULL;
40380 machine_mode mode;
40382 if (GET_CODE (mem) == ZERO_EXTEND)
40383 mem = XEXP (mem, 0);
40385 gcc_assert (REG_P (target) && MEM_P (mem));
40387 addr = XEXP (mem, 0);
40388 fusion_split_address (addr, &addis_value, &load_offset);
40390 /* Now emit the load instruction to the same register. */
40391 mode = GET_MODE (mem);
40392 switch (mode)
40394 case QImode:
40395 mode_name = "char";
40396 load_str = "lbz";
40397 break;
40399 case HImode:
40400 mode_name = "short";
40401 load_str = "lhz";
40402 break;
40404 case SImode:
40405 case SFmode:
40406 mode_name = (mode == SFmode) ? "float" : "int";
40407 load_str = "lwz";
40408 break;
40410 case DImode:
40411 case DFmode:
40412 gcc_assert (TARGET_POWERPC64);
40413 mode_name = (mode == DFmode) ? "double" : "long";
40414 load_str = "ld";
40415 break;
40417 default:
40418 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
40421 /* Emit the addis instruction. */
40422 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
40424 /* Emit the D-form load instruction. */
40425 emit_fusion_load_store (target, target, load_offset, load_str);
40427 return "";
40431 /* Return true if the peephole2 can combine a load/store involving a
40432 combination of an addis instruction and the memory operation. This was
40433 added to the ISA 3.0 (power9) hardware. */
40435 bool
40436 fusion_p9_p (rtx addis_reg, /* register set via addis. */
40437 rtx addis_value, /* addis value. */
40438 rtx dest, /* destination (memory or register). */
40439 rtx src) /* source (register or memory). */
40441 rtx addr, mem, offset;
40442 enum machine_mode mode = GET_MODE (src);
40444 /* Validate arguments. */
40445 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40446 return false;
40448 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40449 return false;
40451 /* Ignore extend operations that are part of the load. */
40452 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
40453 src = XEXP (src, 0);
40455 /* Test for memory<-register or register<-memory. */
40456 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
40458 if (!MEM_P (dest))
40459 return false;
40461 mem = dest;
40464 else if (MEM_P (src))
40466 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
40467 return false;
40469 mem = src;
40472 else
40473 return false;
40475 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
40476 if (GET_CODE (addr) == PLUS)
40478 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
40479 return false;
40481 return satisfies_constraint_I (XEXP (addr, 1));
40484 else if (GET_CODE (addr) == LO_SUM)
40486 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
40487 return false;
40489 offset = XEXP (addr, 1);
40490 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
40491 return small_toc_ref (offset, GET_MODE (offset));
40493 else if (TARGET_ELF && !TARGET_POWERPC64)
40494 return CONSTANT_P (offset);
40497 return false;
40500 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
40501 load sequence.
40503 The operands are:
40504 operands[0] register set with addis
40505 operands[1] value set via addis
40506 operands[2] target register being loaded
40507 operands[3] D-form memory reference using operands[0].
40509 This is similar to the fusion introduced with power8, except it scales to
40510 both loads/stores and does not require the result register to be the same as
40511 the base register. At the moment, we only do this if register set with addis
40512 is dead. */
40514 void
40515 expand_fusion_p9_load (rtx *operands)
40517 rtx tmp_reg = operands[0];
40518 rtx addis_value = operands[1];
40519 rtx target = operands[2];
40520 rtx orig_mem = operands[3];
40521 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
40522 enum rtx_code plus_or_lo_sum;
40523 machine_mode target_mode = GET_MODE (target);
40524 machine_mode extend_mode = target_mode;
40525 machine_mode ptr_mode = Pmode;
40526 enum rtx_code extend = UNKNOWN;
40528 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
40530 extend = GET_CODE (orig_mem);
40531 orig_mem = XEXP (orig_mem, 0);
40532 target_mode = GET_MODE (orig_mem);
40535 gcc_assert (MEM_P (orig_mem));
40537 orig_addr = XEXP (orig_mem, 0);
40538 plus_or_lo_sum = GET_CODE (orig_addr);
40539 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
40541 offset = XEXP (orig_addr, 1);
40542 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
40543 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
40545 if (extend != UNKNOWN)
40546 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
40548 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
40549 UNSPEC_FUSION_P9);
40551 set = gen_rtx_SET (target, new_mem);
40552 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
40553 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
40554 emit_insn (insn);
40556 return;
40559 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
40560 store sequence.
40562 The operands are:
40563 operands[0] register set with addis
40564 operands[1] value set via addis
40565 operands[2] target D-form memory being stored to
40566 operands[3] register being stored
40568 This is similar to the fusion introduced with power8, except it scales to
40569 both loads/stores and does not require the result register to be the same as
40570 the base register. At the moment, we only do this if register set with addis
40571 is dead. */
40573 void
40574 expand_fusion_p9_store (rtx *operands)
40576 rtx tmp_reg = operands[0];
40577 rtx addis_value = operands[1];
40578 rtx orig_mem = operands[2];
40579 rtx src = operands[3];
40580 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
40581 enum rtx_code plus_or_lo_sum;
40582 machine_mode target_mode = GET_MODE (orig_mem);
40583 machine_mode ptr_mode = Pmode;
40585 gcc_assert (MEM_P (orig_mem));
40587 orig_addr = XEXP (orig_mem, 0);
40588 plus_or_lo_sum = GET_CODE (orig_addr);
40589 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
40591 offset = XEXP (orig_addr, 1);
40592 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
40593 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
40595 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
40596 UNSPEC_FUSION_P9);
40598 set = gen_rtx_SET (new_mem, new_src);
40599 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
40600 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
40601 emit_insn (insn);
40603 return;
40606 /* Return a string to fuse an addis instruction with a load using extended
40607 fusion. The address that is used is the logical address that was formed
40608 during peephole2: (lo_sum (high) (low-part))
40610 The code is complicated, so we call output_asm_insn directly, and just
40611 return "". */
40613 const char *
40614 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
40616 enum machine_mode mode = GET_MODE (reg);
40617 rtx hi;
40618 rtx lo;
40619 rtx addr;
40620 const char *load_string;
40621 int r;
40623 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
40625 mem = XEXP (mem, 0);
40626 mode = GET_MODE (mem);
40629 if (GET_CODE (reg) == SUBREG)
40631 gcc_assert (SUBREG_BYTE (reg) == 0);
40632 reg = SUBREG_REG (reg);
40635 if (!REG_P (reg))
40636 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
40638 r = REGNO (reg);
40639 if (FP_REGNO_P (r))
40641 if (mode == SFmode)
40642 load_string = "lfs";
40643 else if (mode == DFmode || mode == DImode)
40644 load_string = "lfd";
40645 else
40646 gcc_unreachable ();
40648 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
40650 if (mode == SFmode)
40651 load_string = "lxssp";
40652 else if (mode == DFmode || mode == DImode)
40653 load_string = "lxsd";
40654 else
40655 gcc_unreachable ();
40657 else if (INT_REGNO_P (r))
40659 switch (mode)
40661 case QImode:
40662 load_string = "lbz";
40663 break;
40664 case HImode:
40665 load_string = "lhz";
40666 break;
40667 case SImode:
40668 case SFmode:
40669 load_string = "lwz";
40670 break;
40671 case DImode:
40672 case DFmode:
40673 if (!TARGET_POWERPC64)
40674 gcc_unreachable ();
40675 load_string = "ld";
40676 break;
40677 default:
40678 gcc_unreachable ();
40681 else
40682 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
40684 if (!MEM_P (mem))
40685 fatal_insn ("emit_fusion_p9_load not MEM", mem);
40687 addr = XEXP (mem, 0);
40688 fusion_split_address (addr, &hi, &lo);
40690 /* Emit the addis instruction. */
40691 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
40693 /* Emit the D-form load instruction. */
40694 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
40696 return "";
40699 /* Return a string to fuse an addis instruction with a store using extended
40700 fusion. The address that is used is the logical address that was formed
40701 during peephole2: (lo_sum (high) (low-part))
40703 The code is complicated, so we call output_asm_insn directly, and just
40704 return "". */
40706 const char *
40707 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
40709 enum machine_mode mode = GET_MODE (reg);
40710 rtx hi;
40711 rtx lo;
40712 rtx addr;
40713 const char *store_string;
40714 int r;
40716 if (GET_CODE (reg) == SUBREG)
40718 gcc_assert (SUBREG_BYTE (reg) == 0);
40719 reg = SUBREG_REG (reg);
40722 if (!REG_P (reg))
40723 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
40725 r = REGNO (reg);
40726 if (FP_REGNO_P (r))
40728 if (mode == SFmode)
40729 store_string = "stfs";
40730 else if (mode == DFmode)
40731 store_string = "stfd";
40732 else
40733 gcc_unreachable ();
40735 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
40737 if (mode == SFmode)
40738 store_string = "stxssp";
40739 else if (mode == DFmode || mode == DImode)
40740 store_string = "stxsd";
40741 else
40742 gcc_unreachable ();
40744 else if (INT_REGNO_P (r))
40746 switch (mode)
40748 case QImode:
40749 store_string = "stb";
40750 break;
40751 case HImode:
40752 store_string = "sth";
40753 break;
40754 case SImode:
40755 case SFmode:
40756 store_string = "stw";
40757 break;
40758 case DImode:
40759 case DFmode:
40760 if (!TARGET_POWERPC64)
40761 gcc_unreachable ();
40762 store_string = "std";
40763 break;
40764 default:
40765 gcc_unreachable ();
40768 else
40769 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
40771 if (!MEM_P (mem))
40772 fatal_insn ("emit_fusion_p9_store not MEM", mem);
40774 addr = XEXP (mem, 0);
40775 fusion_split_address (addr, &hi, &lo);
40777 /* Emit the addis instruction. */
40778 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
40780 /* Emit the D-form load instruction. */
40781 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
40783 return "";
40787 /* Analyze vector computations and remove unnecessary doubleword
40788 swaps (xxswapdi instructions). This pass is performed only
40789 for little-endian VSX code generation.
40791 For this specific case, loads and stores of 4x32 and 2x64 vectors
40792 are inefficient. These are implemented using the lvx2dx and
40793 stvx2dx instructions, which invert the order of doublewords in
40794 a vector register. Thus the code generation inserts an xxswapdi
40795 after each such load, and prior to each such store. (For spill
40796 code after register assignment, an additional xxswapdi is inserted
40797 following each store in order to return a hard register to its
40798 unpermuted value.)
40800 The extra xxswapdi instructions reduce performance. This can be
40801 particularly bad for vectorized code. The purpose of this pass
40802 is to reduce the number of xxswapdi instructions required for
40803 correctness.
40805 The primary insight is that much code that operates on vectors
40806 does not care about the relative order of elements in a register,
40807 so long as the correct memory order is preserved. If we have
40808 a computation where all input values are provided by lvxd2x/xxswapdi
40809 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
40810 and all intermediate computations are pure SIMD (independent of
40811 element order), then all the xxswapdi's associated with the loads
40812 and stores may be removed.
40814 This pass uses some of the infrastructure and logical ideas from
40815 the "web" pass in web.c. We create maximal webs of computations
40816 fitting the description above using union-find. Each such web is
40817 then optimized by removing its unnecessary xxswapdi instructions.
40819 The pass is placed prior to global optimization so that we can
40820 perform the optimization in the safest and simplest way possible;
40821 that is, by replacing each xxswapdi insn with a register copy insn.
40822 Subsequent forward propagation will remove copies where possible.
40824 There are some operations sensitive to element order for which we
40825 can still allow the operation, provided we modify those operations.
40826 These include CONST_VECTORs, for which we must swap the first and
40827 second halves of the constant vector; and SUBREGs, for which we
40828 must adjust the byte offset to account for the swapped doublewords.
40829 A remaining opportunity would be non-immediate-form splats, for
40830 which we should adjust the selected lane of the input. We should
40831 also make code generation adjustments for sum-across operations,
40832 since this is a common vectorizer reduction.
40834 Because we run prior to the first split, we can see loads and stores
40835 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
40836 vector loads and stores that have not yet been split into a permuting
40837 load/store and a swap. (One way this can happen is with a builtin
40838 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
40839 than deleting a swap, we convert the load/store into a permuting
40840 load/store (which effectively removes the swap). */
40842 /* Notes on Permutes
40844 We do not currently handle computations that contain permutes. There
40845 is a general transformation that can be performed correctly, but it
40846 may introduce more expensive code than it replaces. To handle these
40847 would require a cost model to determine when to perform the optimization.
40848 This commentary records how this could be done if desired.
40850 The most general permute is something like this (example for V16QI):
40852 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
40853 (parallel [(const_int a0) (const_int a1)
40855 (const_int a14) (const_int a15)]))
40857 where a0,...,a15 are in [0,31] and select elements from op1 and op2
40858 to produce in the result.
40860 Regardless of mode, we can convert the PARALLEL to a mask of 16
40861 byte-element selectors. Let's call this M, with M[i] representing
40862 the ith byte-element selector value. Then if we swap doublewords
40863 throughout the computation, we can get correct behavior by replacing
40864 M with M' as follows:
40866 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
40867 { ((M[i]+8)%16)+16 : M[i] in [16,31]
40869 This seems promising at first, since we are just replacing one mask
40870 with another. But certain masks are preferable to others. If M
40871 is a mask that matches a vmrghh pattern, for example, M' certainly
40872 will not. Instead of a single vmrghh, we would generate a load of
40873 M' and a vperm. So we would need to know how many xxswapd's we can
40874 remove as a result of this transformation to determine if it's
40875 profitable; and preferably the logic would need to be aware of all
40876 the special preferable masks.
40878 Another form of permute is an UNSPEC_VPERM, in which the mask is
40879 already in a register. In some cases, this mask may be a constant
40880 that we can discover with ud-chains, in which case the above
40881 transformation is ok. However, the common usage here is for the
40882 mask to be produced by an UNSPEC_LVSL, in which case the mask
40883 cannot be known at compile time. In such a case we would have to
40884 generate several instructions to compute M' as above at run time,
40885 and a cost model is needed again.
40887 However, when the mask M for an UNSPEC_VPERM is loaded from the
40888 constant pool, we can replace M with M' as above at no cost
40889 beyond adding a constant pool entry. */
40891 /* This is based on the union-find logic in web.c. web_entry_base is
40892 defined in df.h. */
40893 class swap_web_entry : public web_entry_base
40895 public:
40896 /* Pointer to the insn. */
40897 rtx_insn *insn;
40898 /* Set if insn contains a mention of a vector register. All other
40899 fields are undefined if this field is unset. */
40900 unsigned int is_relevant : 1;
40901 /* Set if insn is a load. */
40902 unsigned int is_load : 1;
40903 /* Set if insn is a store. */
40904 unsigned int is_store : 1;
40905 /* Set if insn is a doubleword swap. This can either be a register swap
40906 or a permuting load or store (test is_load and is_store for this). */
40907 unsigned int is_swap : 1;
40908 /* Set if the insn has a live-in use of a parameter register. */
40909 unsigned int is_live_in : 1;
40910 /* Set if the insn has a live-out def of a return register. */
40911 unsigned int is_live_out : 1;
40912 /* Set if the insn contains a subreg reference of a vector register. */
40913 unsigned int contains_subreg : 1;
40914 /* Set if the insn contains a 128-bit integer operand. */
40915 unsigned int is_128_int : 1;
40916 /* Set if this is a call-insn. */
40917 unsigned int is_call : 1;
40918 /* Set if this insn does not perform a vector operation for which
40919 element order matters, or if we know how to fix it up if it does.
40920 Undefined if is_swap is set. */
40921 unsigned int is_swappable : 1;
40922 /* A nonzero value indicates what kind of special handling for this
40923 insn is required if doublewords are swapped. Undefined if
40924 is_swappable is not set. */
40925 unsigned int special_handling : 4;
40926 /* Set if the web represented by this entry cannot be optimized. */
40927 unsigned int web_not_optimizable : 1;
40928 /* Set if this insn should be deleted. */
40929 unsigned int will_delete : 1;
40932 enum special_handling_values {
40933 SH_NONE = 0,
40934 SH_CONST_VECTOR,
40935 SH_SUBREG,
40936 SH_NOSWAP_LD,
40937 SH_NOSWAP_ST,
40938 SH_EXTRACT,
40939 SH_SPLAT,
40940 SH_XXPERMDI,
40941 SH_CONCAT,
40942 SH_VPERM
40945 /* Union INSN with all insns containing definitions that reach USE.
40946 Detect whether USE is live-in to the current function. */
40947 static void
40948 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
40950 struct df_link *link = DF_REF_CHAIN (use);
40952 if (!link)
40953 insn_entry[INSN_UID (insn)].is_live_in = 1;
40955 while (link)
40957 if (DF_REF_IS_ARTIFICIAL (link->ref))
40958 insn_entry[INSN_UID (insn)].is_live_in = 1;
40960 if (DF_REF_INSN_INFO (link->ref))
40962 rtx def_insn = DF_REF_INSN (link->ref);
40963 (void)unionfind_union (insn_entry + INSN_UID (insn),
40964 insn_entry + INSN_UID (def_insn));
40967 link = link->next;
40971 /* Union INSN with all insns containing uses reached from DEF.
40972 Detect whether DEF is live-out from the current function. */
40973 static void
40974 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
40976 struct df_link *link = DF_REF_CHAIN (def);
40978 if (!link)
40979 insn_entry[INSN_UID (insn)].is_live_out = 1;
40981 while (link)
40983 /* This could be an eh use or some other artificial use;
40984 we treat these all the same (killing the optimization). */
40985 if (DF_REF_IS_ARTIFICIAL (link->ref))
40986 insn_entry[INSN_UID (insn)].is_live_out = 1;
40988 if (DF_REF_INSN_INFO (link->ref))
40990 rtx use_insn = DF_REF_INSN (link->ref);
40991 (void)unionfind_union (insn_entry + INSN_UID (insn),
40992 insn_entry + INSN_UID (use_insn));
40995 link = link->next;
40999 /* Return 1 iff INSN is a load insn, including permuting loads that
41000 represent an lvxd2x instruction; else return 0. */
41001 static unsigned int
41002 insn_is_load_p (rtx insn)
41004 rtx body = PATTERN (insn);
41006 if (GET_CODE (body) == SET)
41008 if (GET_CODE (SET_SRC (body)) == MEM)
41009 return 1;
41011 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
41012 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
41013 return 1;
41015 return 0;
41018 if (GET_CODE (body) != PARALLEL)
41019 return 0;
41021 rtx set = XVECEXP (body, 0, 0);
41023 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
41024 return 1;
41026 return 0;
41029 /* Return 1 iff INSN is a store insn, including permuting stores that
41030 represent an stvxd2x instruction; else return 0. */
41031 static unsigned int
41032 insn_is_store_p (rtx insn)
41034 rtx body = PATTERN (insn);
41035 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
41036 return 1;
41037 if (GET_CODE (body) != PARALLEL)
41038 return 0;
41039 rtx set = XVECEXP (body, 0, 0);
41040 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
41041 return 1;
41042 return 0;
41045 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41046 a permuting load, or a permuting store. */
41047 static unsigned int
41048 insn_is_swap_p (rtx insn)
41050 rtx body = PATTERN (insn);
41051 if (GET_CODE (body) != SET)
41052 return 0;
41053 rtx rhs = SET_SRC (body);
41054 if (GET_CODE (rhs) != VEC_SELECT)
41055 return 0;
41056 rtx parallel = XEXP (rhs, 1);
41057 if (GET_CODE (parallel) != PARALLEL)
41058 return 0;
41059 unsigned int len = XVECLEN (parallel, 0);
41060 if (len != 2 && len != 4 && len != 8 && len != 16)
41061 return 0;
41062 for (unsigned int i = 0; i < len / 2; ++i)
41064 rtx op = XVECEXP (parallel, 0, i);
41065 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
41066 return 0;
41068 for (unsigned int i = len / 2; i < len; ++i)
41070 rtx op = XVECEXP (parallel, 0, i);
41071 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
41072 return 0;
41074 return 1;
41077 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
41078 static bool
41079 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
41081 unsigned uid = INSN_UID (insn);
41082 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
41083 return false;
41085 /* Find the unique use in the swap and locate its def. If the def
41086 isn't unique, punt. */
41087 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41088 df_ref use;
41089 FOR_EACH_INSN_INFO_USE (use, insn_info)
41091 struct df_link *def_link = DF_REF_CHAIN (use);
41092 if (!def_link || def_link->next)
41093 return false;
41095 rtx def_insn = DF_REF_INSN (def_link->ref);
41096 unsigned uid2 = INSN_UID (def_insn);
41097 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
41098 return false;
41100 rtx body = PATTERN (def_insn);
41101 if (GET_CODE (body) != SET
41102 || GET_CODE (SET_SRC (body)) != VEC_SELECT
41103 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
41104 return false;
41106 rtx mem = XEXP (SET_SRC (body), 0);
41107 rtx base_reg = XEXP (mem, 0);
41109 df_ref base_use;
41110 insn_info = DF_INSN_INFO_GET (def_insn);
41111 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
41113 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
41114 continue;
41116 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
41117 if (!base_def_link || base_def_link->next)
41118 return false;
41120 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
41121 rtx tocrel_body = PATTERN (tocrel_insn);
41122 rtx base, offset;
41123 if (GET_CODE (tocrel_body) != SET)
41124 return false;
41125 /* There is an extra level of indirection for small/large
41126 code models. */
41127 rtx tocrel_expr = SET_SRC (tocrel_body);
41128 if (GET_CODE (tocrel_expr) == MEM)
41129 tocrel_expr = XEXP (tocrel_expr, 0);
41130 if (!toc_relative_expr_p (tocrel_expr, false))
41131 return false;
41132 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
41133 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
41134 return false;
41137 return true;
41140 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
41141 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
41142 static bool
41143 v2df_reduction_p (rtx op)
41145 if (GET_MODE (op) != V2DFmode)
41146 return false;
41148 enum rtx_code code = GET_CODE (op);
41149 if (code != PLUS && code != SMIN && code != SMAX)
41150 return false;
41152 rtx concat = XEXP (op, 0);
41153 if (GET_CODE (concat) != VEC_CONCAT)
41154 return false;
41156 rtx select0 = XEXP (concat, 0);
41157 rtx select1 = XEXP (concat, 1);
41158 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
41159 return false;
41161 rtx reg0 = XEXP (select0, 0);
41162 rtx reg1 = XEXP (select1, 0);
41163 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
41164 return false;
41166 rtx parallel0 = XEXP (select0, 1);
41167 rtx parallel1 = XEXP (select1, 1);
41168 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
41169 return false;
41171 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
41172 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
41173 return false;
41175 return true;
41178 /* Return 1 iff OP is an operand that will not be affected by having
41179 vector doublewords swapped in memory. */
41180 static unsigned int
41181 rtx_is_swappable_p (rtx op, unsigned int *special)
41183 enum rtx_code code = GET_CODE (op);
41184 int i, j;
41185 rtx parallel;
41187 switch (code)
41189 case LABEL_REF:
41190 case SYMBOL_REF:
41191 case CLOBBER:
41192 case REG:
41193 return 1;
41195 case VEC_CONCAT:
41196 case ASM_INPUT:
41197 case ASM_OPERANDS:
41198 return 0;
41200 case CONST_VECTOR:
41202 *special = SH_CONST_VECTOR;
41203 return 1;
41206 case VEC_DUPLICATE:
41207 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
41208 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
41209 it represents a vector splat for which we can do special
41210 handling. */
41211 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
41212 return 1;
41213 else if (REG_P (XEXP (op, 0))
41214 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
41215 /* This catches V2DF and V2DI splat, at a minimum. */
41216 return 1;
41217 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
41218 && REG_P (XEXP (XEXP (op, 0), 0))
41219 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
41220 /* This catches splat of a truncated value. */
41221 return 1;
41222 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
41223 /* If the duplicated item is from a select, defer to the select
41224 processing to see if we can change the lane for the splat. */
41225 return rtx_is_swappable_p (XEXP (op, 0), special);
41226 else
41227 return 0;
41229 case VEC_SELECT:
41230 /* A vec_extract operation is ok if we change the lane. */
41231 if (GET_CODE (XEXP (op, 0)) == REG
41232 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
41233 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
41234 && XVECLEN (parallel, 0) == 1
41235 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
41237 *special = SH_EXTRACT;
41238 return 1;
41240 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
41241 XXPERMDI is a swap operation, it will be identified by
41242 insn_is_swap_p and therefore we won't get here. */
41243 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
41244 && (GET_MODE (XEXP (op, 0)) == V4DFmode
41245 || GET_MODE (XEXP (op, 0)) == V4DImode)
41246 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
41247 && XVECLEN (parallel, 0) == 2
41248 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
41249 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
41251 *special = SH_XXPERMDI;
41252 return 1;
41254 else if (v2df_reduction_p (op))
41255 return 1;
41256 else
41257 return 0;
41259 case UNSPEC:
41261 /* Various operations are unsafe for this optimization, at least
41262 without significant additional work. Permutes are obviously
41263 problematic, as both the permute control vector and the ordering
41264 of the target values are invalidated by doubleword swapping.
41265 Vector pack and unpack modify the number of vector lanes.
41266 Merge-high/low will not operate correctly on swapped operands.
41267 Vector shifts across element boundaries are clearly uncool,
41268 as are vector select and concatenate operations. Vector
41269 sum-across instructions define one operand with a specific
41270 order-dependent element, so additional fixup code would be
41271 needed to make those work. Vector set and non-immediate-form
41272 vector splat are element-order sensitive. A few of these
41273 cases might be workable with special handling if required.
41274 Adding cost modeling would be appropriate in some cases. */
41275 int val = XINT (op, 1);
41276 switch (val)
41278 default:
41279 break;
41280 case UNSPEC_VMRGH_DIRECT:
41281 case UNSPEC_VMRGL_DIRECT:
41282 case UNSPEC_VPACK_SIGN_SIGN_SAT:
41283 case UNSPEC_VPACK_SIGN_UNS_SAT:
41284 case UNSPEC_VPACK_UNS_UNS_MOD:
41285 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
41286 case UNSPEC_VPACK_UNS_UNS_SAT:
41287 case UNSPEC_VPERM:
41288 case UNSPEC_VPERM_UNS:
41289 case UNSPEC_VPERMHI:
41290 case UNSPEC_VPERMSI:
41291 case UNSPEC_VPKPX:
41292 case UNSPEC_VSLDOI:
41293 case UNSPEC_VSLO:
41294 case UNSPEC_VSRO:
41295 case UNSPEC_VSUM2SWS:
41296 case UNSPEC_VSUM4S:
41297 case UNSPEC_VSUM4UBS:
41298 case UNSPEC_VSUMSWS:
41299 case UNSPEC_VSUMSWS_DIRECT:
41300 case UNSPEC_VSX_CONCAT:
41301 case UNSPEC_VSX_SET:
41302 case UNSPEC_VSX_SLDWI:
41303 case UNSPEC_VUNPACK_HI_SIGN:
41304 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
41305 case UNSPEC_VUNPACK_LO_SIGN:
41306 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
41307 case UNSPEC_VUPKHPX:
41308 case UNSPEC_VUPKHS_V4SF:
41309 case UNSPEC_VUPKHU_V4SF:
41310 case UNSPEC_VUPKLPX:
41311 case UNSPEC_VUPKLS_V4SF:
41312 case UNSPEC_VUPKLU_V4SF:
41313 case UNSPEC_VSX_CVDPSPN:
41314 case UNSPEC_VSX_CVSPDP:
41315 case UNSPEC_VSX_CVSPDPN:
41316 case UNSPEC_VSX_EXTRACT:
41317 case UNSPEC_VSX_VSLO:
41318 case UNSPEC_VSX_VEC_INIT:
41319 return 0;
41320 case UNSPEC_VSPLT_DIRECT:
41321 *special = SH_SPLAT;
41322 return 1;
41323 case UNSPEC_REDUC_PLUS:
41324 case UNSPEC_REDUC:
41325 return 1;
41329 default:
41330 break;
41333 const char *fmt = GET_RTX_FORMAT (code);
41334 int ok = 1;
41336 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
41337 if (fmt[i] == 'e' || fmt[i] == 'u')
41339 unsigned int special_op = SH_NONE;
41340 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
41341 if (special_op == SH_NONE)
41342 continue;
41343 /* Ensure we never have two kinds of special handling
41344 for the same insn. */
41345 if (*special != SH_NONE && *special != special_op)
41346 return 0;
41347 *special = special_op;
41349 else if (fmt[i] == 'E')
41350 for (j = 0; j < XVECLEN (op, i); ++j)
41352 unsigned int special_op = SH_NONE;
41353 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
41354 if (special_op == SH_NONE)
41355 continue;
41356 /* Ensure we never have two kinds of special handling
41357 for the same insn. */
41358 if (*special != SH_NONE && *special != special_op)
41359 return 0;
41360 *special = special_op;
41363 return ok;
41366 /* Return 1 iff INSN is an operand that will not be affected by
41367 having vector doublewords swapped in memory (in which case
41368 *SPECIAL is unchanged), or that can be modified to be correct
41369 if vector doublewords are swapped in memory (in which case
41370 *SPECIAL is changed to a value indicating how). */
41371 static unsigned int
41372 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
41373 unsigned int *special)
41375 /* Calls are always bad. */
41376 if (GET_CODE (insn) == CALL_INSN)
41377 return 0;
41379 /* Loads and stores seen here are not permuting, but we can still
41380 fix them up by converting them to permuting ones. Exceptions:
41381 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
41382 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
41383 for the SET source. Also we must now make an exception for lvx
41384 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
41385 explicit "& -16") since this leads to unrecognizable insns. */
41386 rtx body = PATTERN (insn);
41387 int i = INSN_UID (insn);
41389 if (insn_entry[i].is_load)
41391 if (GET_CODE (body) == SET)
41393 rtx rhs = SET_SRC (body);
41394 /* Even without a swap, the RHS might be a vec_select for, say,
41395 a byte-reversing load. */
41396 if (GET_CODE (rhs) != MEM)
41397 return 0;
41398 if (GET_CODE (XEXP (rhs, 0)) == AND)
41399 return 0;
41401 *special = SH_NOSWAP_LD;
41402 return 1;
41404 else
41405 return 0;
41408 if (insn_entry[i].is_store)
41410 if (GET_CODE (body) == SET
41411 && GET_CODE (SET_SRC (body)) != UNSPEC)
41413 rtx lhs = SET_DEST (body);
41414 /* Even without a swap, the LHS might be a vec_select for, say,
41415 a byte-reversing store. */
41416 if (GET_CODE (lhs) != MEM)
41417 return 0;
41418 if (GET_CODE (XEXP (lhs, 0)) == AND)
41419 return 0;
41421 *special = SH_NOSWAP_ST;
41422 return 1;
41424 else
41425 return 0;
41428 /* A convert to single precision can be left as is provided that
41429 all of its uses are in xxspltw instructions that splat BE element
41430 zero. */
41431 if (GET_CODE (body) == SET
41432 && GET_CODE (SET_SRC (body)) == UNSPEC
41433 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
41435 df_ref def;
41436 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41438 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41440 struct df_link *link = DF_REF_CHAIN (def);
41441 if (!link)
41442 return 0;
41444 for (; link; link = link->next) {
41445 rtx use_insn = DF_REF_INSN (link->ref);
41446 rtx use_body = PATTERN (use_insn);
41447 if (GET_CODE (use_body) != SET
41448 || GET_CODE (SET_SRC (use_body)) != UNSPEC
41449 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
41450 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
41451 return 0;
41455 return 1;
41458 /* A concatenation of two doublewords is ok if we reverse the
41459 order of the inputs. */
41460 if (GET_CODE (body) == SET
41461 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
41462 && (GET_MODE (SET_SRC (body)) == V2DFmode
41463 || GET_MODE (SET_SRC (body)) == V2DImode))
41465 *special = SH_CONCAT;
41466 return 1;
41469 /* V2DF reductions are always swappable. */
41470 if (GET_CODE (body) == PARALLEL)
41472 rtx expr = XVECEXP (body, 0, 0);
41473 if (GET_CODE (expr) == SET
41474 && v2df_reduction_p (SET_SRC (expr)))
41475 return 1;
41478 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
41479 constant pool. */
41480 if (GET_CODE (body) == SET
41481 && GET_CODE (SET_SRC (body)) == UNSPEC
41482 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
41483 && XVECLEN (SET_SRC (body), 0) == 3
41484 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
41486 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
41487 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41488 df_ref use;
41489 FOR_EACH_INSN_INFO_USE (use, insn_info)
41490 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
41492 struct df_link *def_link = DF_REF_CHAIN (use);
41493 /* Punt if multiple definitions for this reg. */
41494 if (def_link && !def_link->next &&
41495 const_load_sequence_p (insn_entry,
41496 DF_REF_INSN (def_link->ref)))
41498 *special = SH_VPERM;
41499 return 1;
41504 /* Otherwise check the operands for vector lane violations. */
41505 return rtx_is_swappable_p (body, special);
41508 enum chain_purpose { FOR_LOADS, FOR_STORES };
41510 /* Return true if the UD or DU chain headed by LINK is non-empty,
41511 and every entry on the chain references an insn that is a
41512 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
41513 register swap must have only permuting loads as reaching defs.
41514 If PURPOSE is FOR_STORES, each such register swap must have only
41515 register swaps or permuting stores as reached uses. */
41516 static bool
41517 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
41518 enum chain_purpose purpose)
41520 if (!link)
41521 return false;
41523 for (; link; link = link->next)
41525 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
41526 continue;
41528 if (DF_REF_IS_ARTIFICIAL (link->ref))
41529 return false;
41531 rtx reached_insn = DF_REF_INSN (link->ref);
41532 unsigned uid = INSN_UID (reached_insn);
41533 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
41535 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
41536 || insn_entry[uid].is_store)
41537 return false;
41539 if (purpose == FOR_LOADS)
41541 df_ref use;
41542 FOR_EACH_INSN_INFO_USE (use, insn_info)
41544 struct df_link *swap_link = DF_REF_CHAIN (use);
41546 while (swap_link)
41548 if (DF_REF_IS_ARTIFICIAL (link->ref))
41549 return false;
41551 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
41552 unsigned uid2 = INSN_UID (swap_def_insn);
41554 /* Only permuting loads are allowed. */
41555 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
41556 return false;
41558 swap_link = swap_link->next;
41562 else if (purpose == FOR_STORES)
41564 df_ref def;
41565 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41567 struct df_link *swap_link = DF_REF_CHAIN (def);
41569 while (swap_link)
41571 if (DF_REF_IS_ARTIFICIAL (link->ref))
41572 return false;
41574 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
41575 unsigned uid2 = INSN_UID (swap_use_insn);
41577 /* Permuting stores or register swaps are allowed. */
41578 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
41579 return false;
41581 swap_link = swap_link->next;
41587 return true;
41590 /* Mark the xxswapdi instructions associated with permuting loads and
41591 stores for removal. Note that we only flag them for deletion here,
41592 as there is a possibility of a swap being reached from multiple
41593 loads, etc. */
41594 static void
41595 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
41597 rtx insn = insn_entry[i].insn;
41598 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41600 if (insn_entry[i].is_load)
41602 df_ref def;
41603 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41605 struct df_link *link = DF_REF_CHAIN (def);
41607 /* We know by now that these are swaps, so we can delete
41608 them confidently. */
41609 while (link)
41611 rtx use_insn = DF_REF_INSN (link->ref);
41612 insn_entry[INSN_UID (use_insn)].will_delete = 1;
41613 link = link->next;
41617 else if (insn_entry[i].is_store)
41619 df_ref use;
41620 FOR_EACH_INSN_INFO_USE (use, insn_info)
41622 /* Ignore uses for addressability. */
41623 machine_mode mode = GET_MODE (DF_REF_REG (use));
41624 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
41625 continue;
41627 struct df_link *link = DF_REF_CHAIN (use);
41629 /* We know by now that these are swaps, so we can delete
41630 them confidently. */
41631 while (link)
41633 rtx def_insn = DF_REF_INSN (link->ref);
41634 insn_entry[INSN_UID (def_insn)].will_delete = 1;
41635 link = link->next;
41641 /* OP is either a CONST_VECTOR or an expression containing one.
41642 Swap the first half of the vector with the second in the first
41643 case. Recurse to find it in the second. */
41644 static void
41645 swap_const_vector_halves (rtx op)
41647 int i;
41648 enum rtx_code code = GET_CODE (op);
41649 if (GET_CODE (op) == CONST_VECTOR)
41651 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
41652 for (i = 0; i < half_units; ++i)
41654 rtx temp = CONST_VECTOR_ELT (op, i);
41655 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
41656 CONST_VECTOR_ELT (op, i + half_units) = temp;
41659 else
41661 int j;
41662 const char *fmt = GET_RTX_FORMAT (code);
41663 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
41664 if (fmt[i] == 'e' || fmt[i] == 'u')
41665 swap_const_vector_halves (XEXP (op, i));
41666 else if (fmt[i] == 'E')
41667 for (j = 0; j < XVECLEN (op, i); ++j)
41668 swap_const_vector_halves (XVECEXP (op, i, j));
41672 /* Find all subregs of a vector expression that perform a narrowing,
41673 and adjust the subreg index to account for doubleword swapping. */
41674 static void
41675 adjust_subreg_index (rtx op)
41677 enum rtx_code code = GET_CODE (op);
41678 if (code == SUBREG
41679 && (GET_MODE_SIZE (GET_MODE (op))
41680 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
41682 unsigned int index = SUBREG_BYTE (op);
41683 if (index < 8)
41684 index += 8;
41685 else
41686 index -= 8;
41687 SUBREG_BYTE (op) = index;
41690 const char *fmt = GET_RTX_FORMAT (code);
41691 int i,j;
41692 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
41693 if (fmt[i] == 'e' || fmt[i] == 'u')
41694 adjust_subreg_index (XEXP (op, i));
41695 else if (fmt[i] == 'E')
41696 for (j = 0; j < XVECLEN (op, i); ++j)
41697 adjust_subreg_index (XVECEXP (op, i, j));
41700 /* Convert the non-permuting load INSN to a permuting one. */
41701 static void
41702 permute_load (rtx_insn *insn)
41704 rtx body = PATTERN (insn);
41705 rtx mem_op = SET_SRC (body);
41706 rtx tgt_reg = SET_DEST (body);
41707 machine_mode mode = GET_MODE (tgt_reg);
41708 int n_elts = GET_MODE_NUNITS (mode);
41709 int half_elts = n_elts / 2;
41710 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
41711 int i, j;
41712 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
41713 XVECEXP (par, 0, i) = GEN_INT (j);
41714 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
41715 XVECEXP (par, 0, i) = GEN_INT (j);
41716 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
41717 SET_SRC (body) = sel;
41718 INSN_CODE (insn) = -1; /* Force re-recognition. */
41719 df_insn_rescan (insn);
41721 if (dump_file)
41722 fprintf (dump_file, "Replacing load %d with permuted load\n",
41723 INSN_UID (insn));
41726 /* Convert the non-permuting store INSN to a permuting one. */
41727 static void
41728 permute_store (rtx_insn *insn)
41730 rtx body = PATTERN (insn);
41731 rtx src_reg = SET_SRC (body);
41732 machine_mode mode = GET_MODE (src_reg);
41733 int n_elts = GET_MODE_NUNITS (mode);
41734 int half_elts = n_elts / 2;
41735 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
41736 int i, j;
41737 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
41738 XVECEXP (par, 0, i) = GEN_INT (j);
41739 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
41740 XVECEXP (par, 0, i) = GEN_INT (j);
41741 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
41742 SET_SRC (body) = sel;
41743 INSN_CODE (insn) = -1; /* Force re-recognition. */
41744 df_insn_rescan (insn);
41746 if (dump_file)
41747 fprintf (dump_file, "Replacing store %d with permuted store\n",
41748 INSN_UID (insn));
41751 /* Given OP that contains a vector extract operation, adjust the index
41752 of the extracted lane to account for the doubleword swap. */
41753 static void
41754 adjust_extract (rtx_insn *insn)
41756 rtx pattern = PATTERN (insn);
41757 if (GET_CODE (pattern) == PARALLEL)
41758 pattern = XVECEXP (pattern, 0, 0);
41759 rtx src = SET_SRC (pattern);
41760 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
41761 account for that. */
41762 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
41763 rtx par = XEXP (sel, 1);
41764 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
41765 int lane = INTVAL (XVECEXP (par, 0, 0));
41766 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
41767 XVECEXP (par, 0, 0) = GEN_INT (lane);
41768 INSN_CODE (insn) = -1; /* Force re-recognition. */
41769 df_insn_rescan (insn);
41771 if (dump_file)
41772 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
41775 /* Given OP that contains a vector direct-splat operation, adjust the index
41776 of the source lane to account for the doubleword swap. */
41777 static void
41778 adjust_splat (rtx_insn *insn)
41780 rtx body = PATTERN (insn);
41781 rtx unspec = XEXP (body, 1);
41782 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
41783 int lane = INTVAL (XVECEXP (unspec, 0, 1));
41784 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
41785 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
41786 INSN_CODE (insn) = -1; /* Force re-recognition. */
41787 df_insn_rescan (insn);
41789 if (dump_file)
41790 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
41793 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
41794 swap), reverse the order of the source operands and adjust the indices
41795 of the source lanes to account for doubleword reversal. */
41796 static void
41797 adjust_xxpermdi (rtx_insn *insn)
41799 rtx set = PATTERN (insn);
41800 rtx select = XEXP (set, 1);
41801 rtx concat = XEXP (select, 0);
41802 rtx src0 = XEXP (concat, 0);
41803 XEXP (concat, 0) = XEXP (concat, 1);
41804 XEXP (concat, 1) = src0;
41805 rtx parallel = XEXP (select, 1);
41806 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
41807 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
41808 int new_lane0 = 3 - lane1;
41809 int new_lane1 = 3 - lane0;
41810 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
41811 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
41812 INSN_CODE (insn) = -1; /* Force re-recognition. */
41813 df_insn_rescan (insn);
41815 if (dump_file)
41816 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
41819 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
41820 reverse the order of those inputs. */
41821 static void
41822 adjust_concat (rtx_insn *insn)
41824 rtx set = PATTERN (insn);
41825 rtx concat = XEXP (set, 1);
41826 rtx src0 = XEXP (concat, 0);
41827 XEXP (concat, 0) = XEXP (concat, 1);
41828 XEXP (concat, 1) = src0;
41829 INSN_CODE (insn) = -1; /* Force re-recognition. */
41830 df_insn_rescan (insn);
41832 if (dump_file)
41833 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
41836 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
41837 constant pool to reflect swapped doublewords. */
41838 static void
41839 adjust_vperm (rtx_insn *insn)
41841 /* We previously determined that the UNSPEC_VPERM was fed by a
41842 swap of a swapping load of a TOC-relative constant pool symbol.
41843 Find the MEM in the swapping load and replace it with a MEM for
41844 the adjusted mask constant. */
41845 rtx set = PATTERN (insn);
41846 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
41848 /* Find the swap. */
41849 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41850 df_ref use;
41851 rtx_insn *swap_insn = 0;
41852 FOR_EACH_INSN_INFO_USE (use, insn_info)
41853 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
41855 struct df_link *def_link = DF_REF_CHAIN (use);
41856 gcc_assert (def_link && !def_link->next);
41857 swap_insn = DF_REF_INSN (def_link->ref);
41858 break;
41860 gcc_assert (swap_insn);
41862 /* Find the load. */
41863 insn_info = DF_INSN_INFO_GET (swap_insn);
41864 rtx_insn *load_insn = 0;
41865 FOR_EACH_INSN_INFO_USE (use, insn_info)
41867 struct df_link *def_link = DF_REF_CHAIN (use);
41868 gcc_assert (def_link && !def_link->next);
41869 load_insn = DF_REF_INSN (def_link->ref);
41870 break;
41872 gcc_assert (load_insn);
41874 /* Find the TOC-relative symbol access. */
41875 insn_info = DF_INSN_INFO_GET (load_insn);
41876 rtx_insn *tocrel_insn = 0;
41877 FOR_EACH_INSN_INFO_USE (use, insn_info)
41879 struct df_link *def_link = DF_REF_CHAIN (use);
41880 gcc_assert (def_link && !def_link->next);
41881 tocrel_insn = DF_REF_INSN (def_link->ref);
41882 break;
41884 gcc_assert (tocrel_insn);
41886 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
41887 to set tocrel_base; otherwise it would be unnecessary as we've
41888 already established it will return true. */
41889 rtx base, offset;
41890 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
41891 /* There is an extra level of indirection for small/large code models. */
41892 if (GET_CODE (tocrel_expr) == MEM)
41893 tocrel_expr = XEXP (tocrel_expr, 0);
41894 if (!toc_relative_expr_p (tocrel_expr, false))
41895 gcc_unreachable ();
41896 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
41897 rtx const_vector = get_pool_constant (base);
41898 /* With the extra indirection, get_pool_constant will produce the
41899 real constant from the reg_equal expression, so get the real
41900 constant. */
41901 if (GET_CODE (const_vector) == SYMBOL_REF)
41902 const_vector = get_pool_constant (const_vector);
41903 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
41905 /* Create an adjusted mask from the initial mask. */
41906 unsigned int new_mask[16], i, val;
41907 for (i = 0; i < 16; ++i) {
41908 val = INTVAL (XVECEXP (const_vector, 0, i));
41909 if (val < 16)
41910 new_mask[i] = (val + 8) % 16;
41911 else
41912 new_mask[i] = ((val + 8) % 16) + 16;
41915 /* Create a new CONST_VECTOR and a MEM that references it. */
41916 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
41917 for (i = 0; i < 16; ++i)
41918 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
41919 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
41920 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
41921 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
41922 can't recognize. Force the SYMBOL_REF into a register. */
41923 if (!REG_P (XEXP (new_mem, 0))) {
41924 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
41925 XEXP (new_mem, 0) = base_reg;
41926 /* Move the newly created insn ahead of the load insn. */
41927 rtx_insn *force_insn = get_last_insn ();
41928 remove_insn (force_insn);
41929 rtx_insn *before_load_insn = PREV_INSN (load_insn);
41930 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
41931 df_insn_rescan (before_load_insn);
41932 df_insn_rescan (force_insn);
41935 /* Replace the MEM in the load instruction and rescan it. */
41936 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
41937 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
41938 df_insn_rescan (load_insn);
41940 if (dump_file)
41941 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
41944 /* The insn described by INSN_ENTRY[I] can be swapped, but only
41945 with special handling. Take care of that here. */
41946 static void
41947 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
41949 rtx_insn *insn = insn_entry[i].insn;
41950 rtx body = PATTERN (insn);
41952 switch (insn_entry[i].special_handling)
41954 default:
41955 gcc_unreachable ();
41956 case SH_CONST_VECTOR:
41958 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
41959 gcc_assert (GET_CODE (body) == SET);
41960 rtx rhs = SET_SRC (body);
41961 swap_const_vector_halves (rhs);
41962 if (dump_file)
41963 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
41964 break;
41966 case SH_SUBREG:
41967 /* A subreg of the same size is already safe. For subregs that
41968 select a smaller portion of a reg, adjust the index for
41969 swapped doublewords. */
41970 adjust_subreg_index (body);
41971 if (dump_file)
41972 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
41973 break;
41974 case SH_NOSWAP_LD:
41975 /* Convert a non-permuting load to a permuting one. */
41976 permute_load (insn);
41977 break;
41978 case SH_NOSWAP_ST:
41979 /* Convert a non-permuting store to a permuting one. */
41980 permute_store (insn);
41981 break;
41982 case SH_EXTRACT:
41983 /* Change the lane on an extract operation. */
41984 adjust_extract (insn);
41985 break;
41986 case SH_SPLAT:
41987 /* Change the lane on a direct-splat operation. */
41988 adjust_splat (insn);
41989 break;
41990 case SH_XXPERMDI:
41991 /* Change the lanes on an XXPERMDI operation. */
41992 adjust_xxpermdi (insn);
41993 break;
41994 case SH_CONCAT:
41995 /* Reverse the order of a concatenation operation. */
41996 adjust_concat (insn);
41997 break;
41998 case SH_VPERM:
41999 /* Change the mask loaded from the constant pool for a VPERM. */
42000 adjust_vperm (insn);
42001 break;
42005 /* Find the insn from the Ith table entry, which is known to be a
42006 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42007 static void
42008 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
42010 rtx_insn *insn = insn_entry[i].insn;
42011 rtx body = PATTERN (insn);
42012 rtx src_reg = XEXP (SET_SRC (body), 0);
42013 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
42014 rtx_insn *new_insn = emit_insn_before (copy, insn);
42015 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
42016 df_insn_rescan (new_insn);
42018 if (dump_file)
42020 unsigned int new_uid = INSN_UID (new_insn);
42021 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
42024 df_insn_delete (insn);
42025 remove_insn (insn);
42026 insn->set_deleted ();
42029 /* Dump the swap table to DUMP_FILE. */
42030 static void
42031 dump_swap_insn_table (swap_web_entry *insn_entry)
42033 int e = get_max_uid ();
42034 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
42036 for (int i = 0; i < e; ++i)
42037 if (insn_entry[i].is_relevant)
42039 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
42040 fprintf (dump_file, "%6d %6d ", i,
42041 pred_entry && pred_entry->insn
42042 ? INSN_UID (pred_entry->insn) : 0);
42043 if (insn_entry[i].is_load)
42044 fputs ("load ", dump_file);
42045 if (insn_entry[i].is_store)
42046 fputs ("store ", dump_file);
42047 if (insn_entry[i].is_swap)
42048 fputs ("swap ", dump_file);
42049 if (insn_entry[i].is_live_in)
42050 fputs ("live-in ", dump_file);
42051 if (insn_entry[i].is_live_out)
42052 fputs ("live-out ", dump_file);
42053 if (insn_entry[i].contains_subreg)
42054 fputs ("subreg ", dump_file);
42055 if (insn_entry[i].is_128_int)
42056 fputs ("int128 ", dump_file);
42057 if (insn_entry[i].is_call)
42058 fputs ("call ", dump_file);
42059 if (insn_entry[i].is_swappable)
42061 fputs ("swappable ", dump_file);
42062 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
42063 fputs ("special:constvec ", dump_file);
42064 else if (insn_entry[i].special_handling == SH_SUBREG)
42065 fputs ("special:subreg ", dump_file);
42066 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
42067 fputs ("special:load ", dump_file);
42068 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
42069 fputs ("special:store ", dump_file);
42070 else if (insn_entry[i].special_handling == SH_EXTRACT)
42071 fputs ("special:extract ", dump_file);
42072 else if (insn_entry[i].special_handling == SH_SPLAT)
42073 fputs ("special:splat ", dump_file);
42074 else if (insn_entry[i].special_handling == SH_XXPERMDI)
42075 fputs ("special:xxpermdi ", dump_file);
42076 else if (insn_entry[i].special_handling == SH_CONCAT)
42077 fputs ("special:concat ", dump_file);
42078 else if (insn_entry[i].special_handling == SH_VPERM)
42079 fputs ("special:vperm ", dump_file);
42081 if (insn_entry[i].web_not_optimizable)
42082 fputs ("unoptimizable ", dump_file);
42083 if (insn_entry[i].will_delete)
42084 fputs ("delete ", dump_file);
42085 fputs ("\n", dump_file);
42087 fputs ("\n", dump_file);
42090 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
42091 Here RTX is an (& addr (const_int -16)). Always return a new copy
42092 to avoid problems with combine. */
42093 static rtx
42094 alignment_with_canonical_addr (rtx align)
42096 rtx canon;
42097 rtx addr = XEXP (align, 0);
42099 if (REG_P (addr))
42100 canon = addr;
42102 else if (GET_CODE (addr) == PLUS)
42104 rtx addrop0 = XEXP (addr, 0);
42105 rtx addrop1 = XEXP (addr, 1);
42107 if (!REG_P (addrop0))
42108 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
42110 if (!REG_P (addrop1))
42111 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
42113 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
42116 else
42117 canon = force_reg (GET_MODE (addr), addr);
42119 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
42122 /* Check whether an rtx is an alignment mask, and if so, return
42123 a fully-expanded rtx for the masking operation. */
42124 static rtx
42125 alignment_mask (rtx_insn *insn)
42127 rtx body = PATTERN (insn);
42129 if (GET_CODE (body) != SET
42130 || GET_CODE (SET_SRC (body)) != AND
42131 || !REG_P (XEXP (SET_SRC (body), 0)))
42132 return 0;
42134 rtx mask = XEXP (SET_SRC (body), 1);
42136 if (GET_CODE (mask) == CONST_INT)
42138 if (INTVAL (mask) == -16)
42139 return alignment_with_canonical_addr (SET_SRC (body));
42140 else
42141 return 0;
42144 if (!REG_P (mask))
42145 return 0;
42147 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42148 df_ref use;
42149 rtx real_mask = 0;
42151 FOR_EACH_INSN_INFO_USE (use, insn_info)
42153 if (!rtx_equal_p (DF_REF_REG (use), mask))
42154 continue;
42156 struct df_link *def_link = DF_REF_CHAIN (use);
42157 if (!def_link || def_link->next)
42158 return 0;
42160 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
42161 rtx const_body = PATTERN (const_insn);
42162 if (GET_CODE (const_body) != SET)
42163 return 0;
42165 real_mask = SET_SRC (const_body);
42167 if (GET_CODE (real_mask) != CONST_INT
42168 || INTVAL (real_mask) != -16)
42169 return 0;
42172 if (real_mask == 0)
42173 return 0;
42175 return alignment_with_canonical_addr (SET_SRC (body));
42178 /* Given INSN that's a load or store based at BASE_REG, look for a
42179 feeding computation that aligns its address on a 16-byte boundary. */
42180 static rtx
42181 find_alignment_op (rtx_insn *insn, rtx base_reg)
42183 df_ref base_use;
42184 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42185 rtx and_operation = 0;
42187 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
42189 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
42190 continue;
42192 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
42193 if (!base_def_link || base_def_link->next)
42194 break;
42196 /* With stack-protector code enabled, and possibly in other
42197 circumstances, there may not be an associated insn for
42198 the def. */
42199 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
42200 break;
42202 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
42203 and_operation = alignment_mask (and_insn);
42204 if (and_operation != 0)
42205 break;
42208 return and_operation;
42211 struct del_info { bool replace; rtx_insn *replace_insn; };
42213 /* If INSN is the load for an lvx pattern, put it in canonical form. */
42214 static void
42215 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
42217 rtx body = PATTERN (insn);
42218 gcc_assert (GET_CODE (body) == SET
42219 && GET_CODE (SET_SRC (body)) == VEC_SELECT
42220 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
42222 rtx mem = XEXP (SET_SRC (body), 0);
42223 rtx base_reg = XEXP (mem, 0);
42225 rtx and_operation = find_alignment_op (insn, base_reg);
42227 if (and_operation != 0)
42229 df_ref def;
42230 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42231 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42233 struct df_link *link = DF_REF_CHAIN (def);
42234 if (!link || link->next)
42235 break;
42237 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
42238 if (!insn_is_swap_p (swap_insn)
42239 || insn_is_load_p (swap_insn)
42240 || insn_is_store_p (swap_insn))
42241 break;
42243 /* Expected lvx pattern found. Change the swap to
42244 a copy, and propagate the AND operation into the
42245 load. */
42246 to_delete[INSN_UID (swap_insn)].replace = true;
42247 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
42249 XEXP (mem, 0) = and_operation;
42250 SET_SRC (body) = mem;
42251 INSN_CODE (insn) = -1; /* Force re-recognition. */
42252 df_insn_rescan (insn);
42254 if (dump_file)
42255 fprintf (dump_file, "lvx opportunity found at %d\n",
42256 INSN_UID (insn));
42261 /* If INSN is the store for an stvx pattern, put it in canonical form. */
42262 static void
42263 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
42265 rtx body = PATTERN (insn);
42266 gcc_assert (GET_CODE (body) == SET
42267 && GET_CODE (SET_DEST (body)) == MEM
42268 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
42269 rtx mem = SET_DEST (body);
42270 rtx base_reg = XEXP (mem, 0);
42272 rtx and_operation = find_alignment_op (insn, base_reg);
42274 if (and_operation != 0)
42276 rtx src_reg = XEXP (SET_SRC (body), 0);
42277 df_ref src_use;
42278 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42279 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
42281 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
42282 continue;
42284 struct df_link *link = DF_REF_CHAIN (src_use);
42285 if (!link || link->next)
42286 break;
42288 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
42289 if (!insn_is_swap_p (swap_insn)
42290 || insn_is_load_p (swap_insn)
42291 || insn_is_store_p (swap_insn))
42292 break;
42294 /* Expected stvx pattern found. Change the swap to
42295 a copy, and propagate the AND operation into the
42296 store. */
42297 to_delete[INSN_UID (swap_insn)].replace = true;
42298 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
42300 XEXP (mem, 0) = and_operation;
42301 SET_SRC (body) = src_reg;
42302 INSN_CODE (insn) = -1; /* Force re-recognition. */
42303 df_insn_rescan (insn);
42305 if (dump_file)
42306 fprintf (dump_file, "stvx opportunity found at %d\n",
42307 INSN_UID (insn));
42312 /* Look for patterns created from builtin lvx and stvx calls, and
42313 canonicalize them to be properly recognized as such. */
42314 static void
42315 recombine_lvx_stvx_patterns (function *fun)
42317 int i;
42318 basic_block bb;
42319 rtx_insn *insn;
42321 int num_insns = get_max_uid ();
42322 del_info *to_delete = XCNEWVEC (del_info, num_insns);
42324 FOR_ALL_BB_FN (bb, fun)
42325 FOR_BB_INSNS (bb, insn)
42327 if (!NONDEBUG_INSN_P (insn))
42328 continue;
42330 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
42331 recombine_lvx_pattern (insn, to_delete);
42332 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
42333 recombine_stvx_pattern (insn, to_delete);
42336 /* Turning swaps into copies is delayed until now, to avoid problems
42337 with deleting instructions during the insn walk. */
42338 for (i = 0; i < num_insns; i++)
42339 if (to_delete[i].replace)
42341 rtx swap_body = PATTERN (to_delete[i].replace_insn);
42342 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
42343 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
42344 rtx_insn *new_insn = emit_insn_before (copy,
42345 to_delete[i].replace_insn);
42346 set_block_for_insn (new_insn,
42347 BLOCK_FOR_INSN (to_delete[i].replace_insn));
42348 df_insn_rescan (new_insn);
42349 df_insn_delete (to_delete[i].replace_insn);
42350 remove_insn (to_delete[i].replace_insn);
42351 to_delete[i].replace_insn->set_deleted ();
42354 free (to_delete);
42357 /* Main entry point for this pass. */
42358 unsigned int
42359 rs6000_analyze_swaps (function *fun)
42361 swap_web_entry *insn_entry;
42362 basic_block bb;
42363 rtx_insn *insn, *curr_insn = 0;
42365 /* Dataflow analysis for use-def chains. */
42366 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
42367 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
42368 df_analyze ();
42369 df_set_flags (DF_DEFER_INSN_RESCAN);
42371 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
42372 recombine_lvx_stvx_patterns (fun);
42374 /* Allocate structure to represent webs of insns. */
42375 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
42377 /* Walk the insns to gather basic data. */
42378 FOR_ALL_BB_FN (bb, fun)
42379 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
42381 unsigned int uid = INSN_UID (insn);
42382 if (NONDEBUG_INSN_P (insn))
42384 insn_entry[uid].insn = insn;
42386 if (GET_CODE (insn) == CALL_INSN)
42387 insn_entry[uid].is_call = 1;
42389 /* Walk the uses and defs to see if we mention vector regs.
42390 Record any constraints on optimization of such mentions. */
42391 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42392 df_ref mention;
42393 FOR_EACH_INSN_INFO_USE (mention, insn_info)
42395 /* We use DF_REF_REAL_REG here to get inside any subregs. */
42396 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
42398 /* If a use gets its value from a call insn, it will be
42399 a hard register and will look like (reg:V4SI 3 3).
42400 The df analysis creates two mentions for GPR3 and GPR4,
42401 both DImode. We must recognize this and treat it as a
42402 vector mention to ensure the call is unioned with this
42403 use. */
42404 if (mode == DImode && DF_REF_INSN_INFO (mention))
42406 rtx feeder = DF_REF_INSN (mention);
42407 /* FIXME: It is pretty hard to get from the df mention
42408 to the mode of the use in the insn. We arbitrarily
42409 pick a vector mode here, even though the use might
42410 be a real DImode. We can be too conservative
42411 (create a web larger than necessary) because of
42412 this, so consider eventually fixing this. */
42413 if (GET_CODE (feeder) == CALL_INSN)
42414 mode = V4SImode;
42417 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
42419 insn_entry[uid].is_relevant = 1;
42420 if (mode == TImode || mode == V1TImode
42421 || FLOAT128_VECTOR_P (mode))
42422 insn_entry[uid].is_128_int = 1;
42423 if (DF_REF_INSN_INFO (mention))
42424 insn_entry[uid].contains_subreg
42425 = !rtx_equal_p (DF_REF_REG (mention),
42426 DF_REF_REAL_REG (mention));
42427 union_defs (insn_entry, insn, mention);
42430 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
42432 /* We use DF_REF_REAL_REG here to get inside any subregs. */
42433 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
42435 /* If we're loading up a hard vector register for a call,
42436 it looks like (set (reg:V4SI 9 9) (...)). The df
42437 analysis creates two mentions for GPR9 and GPR10, both
42438 DImode. So relying on the mode from the mentions
42439 isn't sufficient to ensure we union the call into the
42440 web with the parameter setup code. */
42441 if (mode == DImode && GET_CODE (insn) == SET
42442 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
42443 mode = GET_MODE (SET_DEST (insn));
42445 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
42447 insn_entry[uid].is_relevant = 1;
42448 if (mode == TImode || mode == V1TImode
42449 || FLOAT128_VECTOR_P (mode))
42450 insn_entry[uid].is_128_int = 1;
42451 if (DF_REF_INSN_INFO (mention))
42452 insn_entry[uid].contains_subreg
42453 = !rtx_equal_p (DF_REF_REG (mention),
42454 DF_REF_REAL_REG (mention));
42455 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
42456 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
42457 insn_entry[uid].is_live_out = 1;
42458 union_uses (insn_entry, insn, mention);
42462 if (insn_entry[uid].is_relevant)
42464 /* Determine if this is a load or store. */
42465 insn_entry[uid].is_load = insn_is_load_p (insn);
42466 insn_entry[uid].is_store = insn_is_store_p (insn);
42468 /* Determine if this is a doubleword swap. If not,
42469 determine whether it can legally be swapped. */
42470 if (insn_is_swap_p (insn))
42471 insn_entry[uid].is_swap = 1;
42472 else
42474 unsigned int special = SH_NONE;
42475 insn_entry[uid].is_swappable
42476 = insn_is_swappable_p (insn_entry, insn, &special);
42477 if (special != SH_NONE && insn_entry[uid].contains_subreg)
42478 insn_entry[uid].is_swappable = 0;
42479 else if (special != SH_NONE)
42480 insn_entry[uid].special_handling = special;
42481 else if (insn_entry[uid].contains_subreg)
42482 insn_entry[uid].special_handling = SH_SUBREG;
42488 if (dump_file)
42490 fprintf (dump_file, "\nSwap insn entry table when first built\n");
42491 dump_swap_insn_table (insn_entry);
42494 /* Record unoptimizable webs. */
42495 unsigned e = get_max_uid (), i;
42496 for (i = 0; i < e; ++i)
42498 if (!insn_entry[i].is_relevant)
42499 continue;
42501 swap_web_entry *root
42502 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
42504 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
42505 || (insn_entry[i].contains_subreg
42506 && insn_entry[i].special_handling != SH_SUBREG)
42507 || insn_entry[i].is_128_int || insn_entry[i].is_call
42508 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
42509 root->web_not_optimizable = 1;
42511 /* If we have loads or stores that aren't permuting then the
42512 optimization isn't appropriate. */
42513 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
42514 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
42515 root->web_not_optimizable = 1;
42517 /* If we have permuting loads or stores that are not accompanied
42518 by a register swap, the optimization isn't appropriate. */
42519 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
42521 rtx insn = insn_entry[i].insn;
42522 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42523 df_ref def;
42525 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42527 struct df_link *link = DF_REF_CHAIN (def);
42529 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
42531 root->web_not_optimizable = 1;
42532 break;
42536 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
42538 rtx insn = insn_entry[i].insn;
42539 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42540 df_ref use;
42542 FOR_EACH_INSN_INFO_USE (use, insn_info)
42544 struct df_link *link = DF_REF_CHAIN (use);
42546 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
42548 root->web_not_optimizable = 1;
42549 break;
42555 if (dump_file)
42557 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
42558 dump_swap_insn_table (insn_entry);
42561 /* For each load and store in an optimizable web (which implies
42562 the loads and stores are permuting), find the associated
42563 register swaps and mark them for removal. Due to various
42564 optimizations we may mark the same swap more than once. Also
42565 perform special handling for swappable insns that require it. */
42566 for (i = 0; i < e; ++i)
42567 if ((insn_entry[i].is_load || insn_entry[i].is_store)
42568 && insn_entry[i].is_swap)
42570 swap_web_entry* root_entry
42571 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
42572 if (!root_entry->web_not_optimizable)
42573 mark_swaps_for_removal (insn_entry, i);
42575 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
42577 swap_web_entry* root_entry
42578 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
42579 if (!root_entry->web_not_optimizable)
42580 handle_special_swappables (insn_entry, i);
42583 /* Now delete the swaps marked for removal. */
42584 for (i = 0; i < e; ++i)
42585 if (insn_entry[i].will_delete)
42586 replace_swap_with_copy (insn_entry, i);
42588 /* Clean up. */
42589 free (insn_entry);
42590 return 0;
42593 const pass_data pass_data_analyze_swaps =
42595 RTL_PASS, /* type */
42596 "swaps", /* name */
42597 OPTGROUP_NONE, /* optinfo_flags */
42598 TV_NONE, /* tv_id */
42599 0, /* properties_required */
42600 0, /* properties_provided */
42601 0, /* properties_destroyed */
42602 0, /* todo_flags_start */
42603 TODO_df_finish, /* todo_flags_finish */
42606 class pass_analyze_swaps : public rtl_opt_pass
42608 public:
42609 pass_analyze_swaps(gcc::context *ctxt)
42610 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
42613 /* opt_pass methods: */
42614 virtual bool gate (function *)
42616 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
42617 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
42620 virtual unsigned int execute (function *fun)
42622 return rs6000_analyze_swaps (fun);
42625 opt_pass *clone ()
42627 return new pass_analyze_swaps (m_ctxt);
42630 }; // class pass_analyze_swaps
42632 rtl_opt_pass *
42633 make_pass_analyze_swaps (gcc::context *ctxt)
42635 return new pass_analyze_swaps (ctxt);
42638 #ifdef RS6000_GLIBC_ATOMIC_FENV
42639 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
42640 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
42641 #endif
42643 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
42645 static void
42646 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
42648 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
42650 #ifdef RS6000_GLIBC_ATOMIC_FENV
42651 if (atomic_hold_decl == NULL_TREE)
42653 atomic_hold_decl
42654 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
42655 get_identifier ("__atomic_feholdexcept"),
42656 build_function_type_list (void_type_node,
42657 double_ptr_type_node,
42658 NULL_TREE));
42659 TREE_PUBLIC (atomic_hold_decl) = 1;
42660 DECL_EXTERNAL (atomic_hold_decl) = 1;
42663 if (atomic_clear_decl == NULL_TREE)
42665 atomic_clear_decl
42666 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
42667 get_identifier ("__atomic_feclearexcept"),
42668 build_function_type_list (void_type_node,
42669 NULL_TREE));
42670 TREE_PUBLIC (atomic_clear_decl) = 1;
42671 DECL_EXTERNAL (atomic_clear_decl) = 1;
42674 tree const_double = build_qualified_type (double_type_node,
42675 TYPE_QUAL_CONST);
42676 tree const_double_ptr = build_pointer_type (const_double);
42677 if (atomic_update_decl == NULL_TREE)
42679 atomic_update_decl
42680 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
42681 get_identifier ("__atomic_feupdateenv"),
42682 build_function_type_list (void_type_node,
42683 const_double_ptr,
42684 NULL_TREE));
42685 TREE_PUBLIC (atomic_update_decl) = 1;
42686 DECL_EXTERNAL (atomic_update_decl) = 1;
42689 tree fenv_var = create_tmp_var_raw (double_type_node);
42690 TREE_ADDRESSABLE (fenv_var) = 1;
42691 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
42693 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
42694 *clear = build_call_expr (atomic_clear_decl, 0);
42695 *update = build_call_expr (atomic_update_decl, 1,
42696 fold_convert (const_double_ptr, fenv_addr));
42697 #endif
42698 return;
42701 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
42702 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
42703 tree call_mffs = build_call_expr (mffs, 0);
42705 /* Generates the equivalent of feholdexcept (&fenv_var)
42707 *fenv_var = __builtin_mffs ();
42708 double fenv_hold;
42709 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
42710 __builtin_mtfsf (0xff, fenv_hold); */
42712 /* Mask to clear everything except for the rounding modes and non-IEEE
42713 arithmetic flag. */
42714 const unsigned HOST_WIDE_INT hold_exception_mask =
42715 HOST_WIDE_INT_C (0xffffffff00000007);
42717 tree fenv_var = create_tmp_var_raw (double_type_node);
42719 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
42721 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
42722 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
42723 build_int_cst (uint64_type_node,
42724 hold_exception_mask));
42726 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
42727 fenv_llu_and);
42729 tree hold_mtfsf = build_call_expr (mtfsf, 2,
42730 build_int_cst (unsigned_type_node, 0xff),
42731 fenv_hold_mtfsf);
42733 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
42735 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
42737 double fenv_clear = __builtin_mffs ();
42738 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
42739 __builtin_mtfsf (0xff, fenv_clear); */
42741 /* Mask to clear everything except for the rounding modes and non-IEEE
42742 arithmetic flag. */
42743 const unsigned HOST_WIDE_INT clear_exception_mask =
42744 HOST_WIDE_INT_C (0xffffffff00000000);
42746 tree fenv_clear = create_tmp_var_raw (double_type_node);
42748 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
42750 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
42751 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
42752 fenv_clean_llu,
42753 build_int_cst (uint64_type_node,
42754 clear_exception_mask));
42756 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
42757 fenv_clear_llu_and);
42759 tree clear_mtfsf = build_call_expr (mtfsf, 2,
42760 build_int_cst (unsigned_type_node, 0xff),
42761 fenv_clear_mtfsf);
42763 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
42765 /* Generates the equivalent of feupdateenv (&fenv_var)
42767 double old_fenv = __builtin_mffs ();
42768 double fenv_update;
42769 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
42770 (*(uint64_t*)fenv_var 0x1ff80fff);
42771 __builtin_mtfsf (0xff, fenv_update); */
42773 const unsigned HOST_WIDE_INT update_exception_mask =
42774 HOST_WIDE_INT_C (0xffffffff1fffff00);
42775 const unsigned HOST_WIDE_INT new_exception_mask =
42776 HOST_WIDE_INT_C (0x1ff80fff);
42778 tree old_fenv = create_tmp_var_raw (double_type_node);
42779 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
42781 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
42782 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
42783 build_int_cst (uint64_type_node,
42784 update_exception_mask));
42786 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
42787 build_int_cst (uint64_type_node,
42788 new_exception_mask));
42790 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
42791 old_llu_and, new_llu_and);
42793 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
42794 new_llu_mask);
42796 tree update_mtfsf = build_call_expr (mtfsf, 2,
42797 build_int_cst (unsigned_type_node, 0xff),
42798 fenv_update_mtfsf);
42800 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
42803 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
42805 static bool
42806 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
42807 optimization_type opt_type)
42809 switch (op)
42811 case rsqrt_optab:
42812 return (opt_type == OPTIMIZE_FOR_SPEED
42813 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
42815 default:
42816 return true;
42820 struct gcc_target targetm = TARGET_INITIALIZER;
42822 #include "gt-rs6000.h"