gcc/
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob016a9bef2dfdf7a45124ea4b06414261b990e60b
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "output.h"
53 #include "dbxout.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "sched-int.h"
58 #include "gimplify.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
63 #include "intl.h"
64 #include "params.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "context.h"
70 #include "tree-pass.h"
71 #include "except.h"
72 #if TARGET_XCOFF
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
74 #endif
75 #if TARGET_MACHO
76 #include "gstab.h" /* for N_SLINE */
77 #endif
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "tree-ssa-propagate.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 #ifndef TARGET_NO_PROTOTYPE
86 #define TARGET_NO_PROTOTYPE 0
87 #endif
89 #define min(A,B) ((A) < (B) ? (A) : (B))
90 #define max(A,B) ((A) > (B) ? (A) : (B))
92 /* Structure used to define the rs6000 stack */
93 typedef struct rs6000_stack {
94 int reload_completed; /* stack info won't change from here on */
95 int first_gp_reg_save; /* first callee saved GP register used */
96 int first_fp_reg_save; /* first callee saved FP register used */
97 int first_altivec_reg_save; /* first callee saved AltiVec register used */
98 int lr_save_p; /* true if the link reg needs to be saved */
99 int cr_save_p; /* true if the CR reg needs to be saved */
100 unsigned int vrsave_mask; /* mask of vec registers to save */
101 int push_p; /* true if we need to allocate stack space */
102 int calls_p; /* true if the function makes any calls */
103 int world_save_p; /* true if we're saving *everything*:
104 r13-r31, cr, f14-f31, vrsave, v20-v31 */
105 enum rs6000_abi abi; /* which ABI to use */
106 int gp_save_offset; /* offset to save GP regs from initial SP */
107 int fp_save_offset; /* offset to save FP regs from initial SP */
108 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
109 int lr_save_offset; /* offset to save LR from initial SP */
110 int cr_save_offset; /* offset to save CR from initial SP */
111 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
112 int varargs_save_offset; /* offset to save the varargs registers */
113 int ehrd_offset; /* offset to EH return data */
114 int ehcr_offset; /* offset to EH CR field data */
115 int reg_size; /* register size (4 or 8) */
116 HOST_WIDE_INT vars_size; /* variable save area size */
117 int parm_size; /* outgoing parameter size */
118 int save_size; /* save area size */
119 int fixed_size; /* fixed size of stack frame */
120 int gp_size; /* size of saved GP registers */
121 int fp_size; /* size of saved FP registers */
122 int altivec_size; /* size of saved AltiVec registers */
123 int cr_size; /* size to hold CR if not in fixed area */
124 int vrsave_size; /* size to hold VRSAVE */
125 int altivec_padding_size; /* size of altivec alignment padding */
126 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
127 int savres_strategy;
128 } rs6000_stack_t;
130 /* A C structure for machine-specific, per-function data.
131 This is added to the cfun structure. */
132 typedef struct GTY(()) machine_function
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame;
136 /* Flags if __builtin_return_address (0) was used. */
137 int ra_need_lr;
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
139 int lr_save_state;
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer;
152 bool split_stack_argp_used;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed;
155 /* The number of components we use for separate shrink-wrapping. */
156 int n_components;
157 /* The components already handled by separate shrink-wrapping, which should
158 not be considered by the prologue and epilogue. */
159 bool gpr_is_wrapped_separately[32];
160 bool fpr_is_wrapped_separately[32];
161 bool lr_is_wrapped_separately;
162 } machine_function;
164 /* Support targetm.vectorize.builtin_mask_for_load. */
165 static GTY(()) tree altivec_builtin_mask_for_load;
167 /* Set to nonzero once AIX common-mode calls have been defined. */
168 static GTY(()) int common_mode_defined;
170 /* Label number of label created for -mrelocatable, to call to so we can
171 get the address of the GOT section */
172 static int rs6000_pic_labelno;
174 #ifdef USING_ELFOS_H
175 /* Counter for labels which are to be placed in .fixup. */
176 int fixuplabelno = 0;
177 #endif
179 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
180 int dot_symbols;
182 /* Specify the machine mode that pointers have. After generation of rtl, the
183 compiler makes no further distinction between pointers and any other objects
184 of this machine mode. The type is unsigned since not all things that
185 include rs6000.h also include machmode.h. */
186 unsigned rs6000_pmode;
188 /* Width in bits of a pointer. */
189 unsigned rs6000_pointer_size;
191 #ifdef HAVE_AS_GNU_ATTRIBUTE
192 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
193 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
194 # endif
195 /* Flag whether floating point values have been passed/returned.
196 Note that this doesn't say whether fprs are used, since the
197 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
198 should be set for soft-float values passed in gprs and ieee128
199 values passed in vsx registers. */
200 static bool rs6000_passes_float;
201 static bool rs6000_passes_long_double;
202 /* Flag whether vector values have been passed/returned. */
203 static bool rs6000_passes_vector;
204 /* Flag whether small (<= 8 byte) structures have been returned. */
205 static bool rs6000_returns_struct;
206 #endif
208 /* Value is TRUE if register/mode pair is acceptable. */
209 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
211 /* Maximum number of registers needed for a given register class and mode. */
212 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
214 /* How many registers are needed for a given register and mode. */
215 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
217 /* Map register number to register class. */
218 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
220 static int dbg_cost_ctrl;
222 /* Built in types. */
223 tree rs6000_builtin_types[RS6000_BTI_MAX];
224 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
226 /* Flag to say the TOC is initialized */
227 int toc_initialized, need_toc_init;
228 char toc_label_name[10];
230 /* Cached value of rs6000_variable_issue. This is cached in
231 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
232 static short cached_can_issue_more;
234 static GTY(()) section *read_only_data_section;
235 static GTY(()) section *private_data_section;
236 static GTY(()) section *tls_data_section;
237 static GTY(()) section *tls_private_data_section;
238 static GTY(()) section *read_only_private_data_section;
239 static GTY(()) section *sdata2_section;
240 static GTY(()) section *toc_section;
242 struct builtin_description
244 const HOST_WIDE_INT mask;
245 const enum insn_code icode;
246 const char *const name;
247 const enum rs6000_builtins code;
250 /* Describe the vector unit used for modes. */
251 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
252 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
254 /* Register classes for various constraints that are based on the target
255 switches. */
256 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
258 /* Describe the alignment of a vector. */
259 int rs6000_vector_align[NUM_MACHINE_MODES];
261 /* Map selected modes to types for builtins. */
262 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
264 /* What modes to automatically generate reciprocal divide estimate (fre) and
265 reciprocal sqrt (frsqrte) for. */
266 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
268 /* Masks to determine which reciprocal esitmate instructions to generate
269 automatically. */
270 enum rs6000_recip_mask {
271 RECIP_SF_DIV = 0x001, /* Use divide estimate */
272 RECIP_DF_DIV = 0x002,
273 RECIP_V4SF_DIV = 0x004,
274 RECIP_V2DF_DIV = 0x008,
276 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
277 RECIP_DF_RSQRT = 0x020,
278 RECIP_V4SF_RSQRT = 0x040,
279 RECIP_V2DF_RSQRT = 0x080,
281 /* Various combination of flags for -mrecip=xxx. */
282 RECIP_NONE = 0,
283 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
284 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
285 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
287 RECIP_HIGH_PRECISION = RECIP_ALL,
289 /* On low precision machines like the power5, don't enable double precision
290 reciprocal square root estimate, since it isn't accurate enough. */
291 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
294 /* -mrecip options. */
295 static struct
297 const char *string; /* option name */
298 unsigned int mask; /* mask bits to set */
299 } recip_options[] = {
300 { "all", RECIP_ALL },
301 { "none", RECIP_NONE },
302 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
303 | RECIP_V2DF_DIV) },
304 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
305 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
306 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
307 | RECIP_V2DF_RSQRT) },
308 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
309 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
312 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
313 static const struct
315 const char *cpu;
316 unsigned int cpuid;
317 } cpu_is_info[] = {
318 { "power9", PPC_PLATFORM_POWER9 },
319 { "power8", PPC_PLATFORM_POWER8 },
320 { "power7", PPC_PLATFORM_POWER7 },
321 { "power6x", PPC_PLATFORM_POWER6X },
322 { "power6", PPC_PLATFORM_POWER6 },
323 { "power5+", PPC_PLATFORM_POWER5_PLUS },
324 { "power5", PPC_PLATFORM_POWER5 },
325 { "ppc970", PPC_PLATFORM_PPC970 },
326 { "power4", PPC_PLATFORM_POWER4 },
327 { "ppca2", PPC_PLATFORM_PPCA2 },
328 { "ppc476", PPC_PLATFORM_PPC476 },
329 { "ppc464", PPC_PLATFORM_PPC464 },
330 { "ppc440", PPC_PLATFORM_PPC440 },
331 { "ppc405", PPC_PLATFORM_PPC405 },
332 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
335 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
336 static const struct
338 const char *hwcap;
339 int mask;
340 unsigned int id;
341 } cpu_supports_info[] = {
342 /* AT_HWCAP masks. */
343 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
344 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
345 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
346 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
347 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
348 { "booke", PPC_FEATURE_BOOKE, 0 },
349 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
350 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
351 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
352 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
353 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
354 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
355 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
356 { "notb", PPC_FEATURE_NO_TB, 0 },
357 { "pa6t", PPC_FEATURE_PA6T, 0 },
358 { "power4", PPC_FEATURE_POWER4, 0 },
359 { "power5", PPC_FEATURE_POWER5, 0 },
360 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
361 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
362 { "ppc32", PPC_FEATURE_32, 0 },
363 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
364 { "ppc64", PPC_FEATURE_64, 0 },
365 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
366 { "smt", PPC_FEATURE_SMT, 0 },
367 { "spe", PPC_FEATURE_HAS_SPE, 0 },
368 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
369 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
370 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
372 /* AT_HWCAP2 masks. */
373 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
374 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
375 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
376 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
377 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
378 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
379 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
380 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
381 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
382 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
385 /* On PowerPC, we have a limited number of target clones that we care about
386 which means we can use an array to hold the options, rather than having more
387 elaborate data structures to identify each possible variation. Order the
388 clones from the default to the highest ISA. */
389 enum {
390 CLONE_DEFAULT = 0, /* default clone. */
391 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
392 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
393 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
394 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
395 CLONE_MAX
398 /* Map compiler ISA bits into HWCAP names. */
399 struct clone_map {
400 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
401 const char *name; /* name to use in __builtin_cpu_supports. */
404 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
405 { 0, "" }, /* Default options. */
406 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
407 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
408 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
409 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
413 /* Newer LIBCs explicitly export this symbol to declare that they provide
414 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
415 reference to this symbol whenever we expand a CPU builtin, so that
416 we never link against an old LIBC. */
417 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
419 /* True if we have expanded a CPU builtin. */
420 bool cpu_builtin_p;
422 /* Pointer to function (in rs6000-c.c) that can define or undefine target
423 macros that have changed. Languages that don't support the preprocessor
424 don't link in rs6000-c.c, so we can't call it directly. */
425 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
427 /* Simplfy register classes into simpler classifications. We assume
428 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
429 check for standard register classes (gpr/floating/altivec/vsx) and
430 floating/vector classes (float/altivec/vsx). */
432 enum rs6000_reg_type {
433 NO_REG_TYPE,
434 PSEUDO_REG_TYPE,
435 GPR_REG_TYPE,
436 VSX_REG_TYPE,
437 ALTIVEC_REG_TYPE,
438 FPR_REG_TYPE,
439 SPR_REG_TYPE,
440 CR_REG_TYPE,
443 /* Map register class to register type. */
444 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
446 /* First/last register type for the 'normal' register types (i.e. general
447 purpose, floating point, altivec, and VSX registers). */
448 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
450 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
453 /* Register classes we care about in secondary reload or go if legitimate
454 address. We only need to worry about GPR, FPR, and Altivec registers here,
455 along an ANY field that is the OR of the 3 register classes. */
457 enum rs6000_reload_reg_type {
458 RELOAD_REG_GPR, /* General purpose registers. */
459 RELOAD_REG_FPR, /* Traditional floating point regs. */
460 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
461 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
462 N_RELOAD_REG
465 /* For setting up register classes, loop through the 3 register classes mapping
466 into real registers, and skip the ANY class, which is just an OR of the
467 bits. */
468 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
469 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
471 /* Map reload register type to a register in the register class. */
472 struct reload_reg_map_type {
473 const char *name; /* Register class name. */
474 int reg; /* Register in the register class. */
477 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
478 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
479 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
480 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
481 { "Any", -1 }, /* RELOAD_REG_ANY. */
484 /* Mask bits for each register class, indexed per mode. Historically the
485 compiler has been more restrictive which types can do PRE_MODIFY instead of
486 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
487 typedef unsigned char addr_mask_type;
489 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
490 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
491 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
492 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
493 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
494 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
495 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
496 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
498 /* Register type masks based on the type, of valid addressing modes. */
499 struct rs6000_reg_addr {
500 enum insn_code reload_load; /* INSN to reload for loading. */
501 enum insn_code reload_store; /* INSN to reload for storing. */
502 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
503 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
504 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
505 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
506 /* INSNs for fusing addi with loads
507 or stores for each reg. class. */
508 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
509 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
510 /* INSNs for fusing addis with loads
511 or stores for each reg. class. */
512 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
513 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
514 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
515 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
516 bool fused_toc; /* Mode supports TOC fusion. */
519 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
521 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
522 static inline bool
523 mode_supports_pre_incdec_p (machine_mode mode)
525 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
526 != 0);
529 /* Helper function to say whether a mode supports PRE_MODIFY. */
530 static inline bool
531 mode_supports_pre_modify_p (machine_mode mode)
533 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
534 != 0);
537 /* Given that there exists at least one variable that is set (produced)
538 by OUT_INSN and read (consumed) by IN_INSN, return true iff
539 IN_INSN represents one or more memory store operations and none of
540 the variables set by OUT_INSN is used by IN_INSN as the address of a
541 store operation. If either IN_INSN or OUT_INSN does not represent
542 a "single" RTL SET expression (as loosely defined by the
543 implementation of the single_set function) or a PARALLEL with only
544 SETs, CLOBBERs, and USEs inside, this function returns false.
546 This rs6000-specific version of store_data_bypass_p checks for
547 certain conditions that result in assertion failures (and internal
548 compiler errors) in the generic store_data_bypass_p function and
549 returns false rather than calling store_data_bypass_p if one of the
550 problematic conditions is detected. */
553 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
555 rtx out_set, in_set;
556 rtx out_pat, in_pat;
557 rtx out_exp, in_exp;
558 int i, j;
560 in_set = single_set (in_insn);
561 if (in_set)
563 if (MEM_P (SET_DEST (in_set)))
565 out_set = single_set (out_insn);
566 if (!out_set)
568 out_pat = PATTERN (out_insn);
569 if (GET_CODE (out_pat) == PARALLEL)
571 for (i = 0; i < XVECLEN (out_pat, 0); i++)
573 out_exp = XVECEXP (out_pat, 0, i);
574 if ((GET_CODE (out_exp) == CLOBBER)
575 || (GET_CODE (out_exp) == USE))
576 continue;
577 else if (GET_CODE (out_exp) != SET)
578 return false;
584 else
586 in_pat = PATTERN (in_insn);
587 if (GET_CODE (in_pat) != PARALLEL)
588 return false;
590 for (i = 0; i < XVECLEN (in_pat, 0); i++)
592 in_exp = XVECEXP (in_pat, 0, i);
593 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
594 continue;
595 else if (GET_CODE (in_exp) != SET)
596 return false;
598 if (MEM_P (SET_DEST (in_exp)))
600 out_set = single_set (out_insn);
601 if (!out_set)
603 out_pat = PATTERN (out_insn);
604 if (GET_CODE (out_pat) != PARALLEL)
605 return false;
606 for (j = 0; j < XVECLEN (out_pat, 0); j++)
608 out_exp = XVECEXP (out_pat, 0, j);
609 if ((GET_CODE (out_exp) == CLOBBER)
610 || (GET_CODE (out_exp) == USE))
611 continue;
612 else if (GET_CODE (out_exp) != SET)
613 return false;
619 return store_data_bypass_p (out_insn, in_insn);
622 /* Return true if we have D-form addressing in altivec registers. */
623 static inline bool
624 mode_supports_vmx_dform (machine_mode mode)
626 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
629 /* Return true if we have D-form addressing in VSX registers. This addressing
630 is more limited than normal d-form addressing in that the offset must be
631 aligned on a 16-byte boundary. */
632 static inline bool
633 mode_supports_vsx_dform_quad (machine_mode mode)
635 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
636 != 0);
640 /* Target cpu costs. */
642 struct processor_costs {
643 const int mulsi; /* cost of SImode multiplication. */
644 const int mulsi_const; /* cost of SImode multiplication by constant. */
645 const int mulsi_const9; /* cost of SImode mult by short constant. */
646 const int muldi; /* cost of DImode multiplication. */
647 const int divsi; /* cost of SImode division. */
648 const int divdi; /* cost of DImode division. */
649 const int fp; /* cost of simple SFmode and DFmode insns. */
650 const int dmul; /* cost of DFmode multiplication (and fmadd). */
651 const int sdiv; /* cost of SFmode division (fdivs). */
652 const int ddiv; /* cost of DFmode division (fdiv). */
653 const int cache_line_size; /* cache line size in bytes. */
654 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
655 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
656 const int simultaneous_prefetches; /* number of parallel prefetch
657 operations. */
658 const int sfdf_convert; /* cost of SF->DF conversion. */
661 const struct processor_costs *rs6000_cost;
663 /* Processor costs (relative to an add) */
665 /* Instruction size costs on 32bit processors. */
666 static const
667 struct processor_costs size32_cost = {
668 COSTS_N_INSNS (1), /* mulsi */
669 COSTS_N_INSNS (1), /* mulsi_const */
670 COSTS_N_INSNS (1), /* mulsi_const9 */
671 COSTS_N_INSNS (1), /* muldi */
672 COSTS_N_INSNS (1), /* divsi */
673 COSTS_N_INSNS (1), /* divdi */
674 COSTS_N_INSNS (1), /* fp */
675 COSTS_N_INSNS (1), /* dmul */
676 COSTS_N_INSNS (1), /* sdiv */
677 COSTS_N_INSNS (1), /* ddiv */
678 32, /* cache line size */
679 0, /* l1 cache */
680 0, /* l2 cache */
681 0, /* streams */
682 0, /* SF->DF convert */
685 /* Instruction size costs on 64bit processors. */
686 static const
687 struct processor_costs size64_cost = {
688 COSTS_N_INSNS (1), /* mulsi */
689 COSTS_N_INSNS (1), /* mulsi_const */
690 COSTS_N_INSNS (1), /* mulsi_const9 */
691 COSTS_N_INSNS (1), /* muldi */
692 COSTS_N_INSNS (1), /* divsi */
693 COSTS_N_INSNS (1), /* divdi */
694 COSTS_N_INSNS (1), /* fp */
695 COSTS_N_INSNS (1), /* dmul */
696 COSTS_N_INSNS (1), /* sdiv */
697 COSTS_N_INSNS (1), /* ddiv */
698 128, /* cache line size */
699 0, /* l1 cache */
700 0, /* l2 cache */
701 0, /* streams */
702 0, /* SF->DF convert */
705 /* Instruction costs on RS64A processors. */
706 static const
707 struct processor_costs rs64a_cost = {
708 COSTS_N_INSNS (20), /* mulsi */
709 COSTS_N_INSNS (12), /* mulsi_const */
710 COSTS_N_INSNS (8), /* mulsi_const9 */
711 COSTS_N_INSNS (34), /* muldi */
712 COSTS_N_INSNS (65), /* divsi */
713 COSTS_N_INSNS (67), /* divdi */
714 COSTS_N_INSNS (4), /* fp */
715 COSTS_N_INSNS (4), /* dmul */
716 COSTS_N_INSNS (31), /* sdiv */
717 COSTS_N_INSNS (31), /* ddiv */
718 128, /* cache line size */
719 128, /* l1 cache */
720 2048, /* l2 cache */
721 1, /* streams */
722 0, /* SF->DF convert */
725 /* Instruction costs on MPCCORE processors. */
726 static const
727 struct processor_costs mpccore_cost = {
728 COSTS_N_INSNS (2), /* mulsi */
729 COSTS_N_INSNS (2), /* mulsi_const */
730 COSTS_N_INSNS (2), /* mulsi_const9 */
731 COSTS_N_INSNS (2), /* muldi */
732 COSTS_N_INSNS (6), /* divsi */
733 COSTS_N_INSNS (6), /* divdi */
734 COSTS_N_INSNS (4), /* fp */
735 COSTS_N_INSNS (5), /* dmul */
736 COSTS_N_INSNS (10), /* sdiv */
737 COSTS_N_INSNS (17), /* ddiv */
738 32, /* cache line size */
739 4, /* l1 cache */
740 16, /* l2 cache */
741 1, /* streams */
742 0, /* SF->DF convert */
745 /* Instruction costs on PPC403 processors. */
746 static const
747 struct processor_costs ppc403_cost = {
748 COSTS_N_INSNS (4), /* mulsi */
749 COSTS_N_INSNS (4), /* mulsi_const */
750 COSTS_N_INSNS (4), /* mulsi_const9 */
751 COSTS_N_INSNS (4), /* muldi */
752 COSTS_N_INSNS (33), /* divsi */
753 COSTS_N_INSNS (33), /* divdi */
754 COSTS_N_INSNS (11), /* fp */
755 COSTS_N_INSNS (11), /* dmul */
756 COSTS_N_INSNS (11), /* sdiv */
757 COSTS_N_INSNS (11), /* ddiv */
758 32, /* cache line size */
759 4, /* l1 cache */
760 16, /* l2 cache */
761 1, /* streams */
762 0, /* SF->DF convert */
765 /* Instruction costs on PPC405 processors. */
766 static const
767 struct processor_costs ppc405_cost = {
768 COSTS_N_INSNS (5), /* mulsi */
769 COSTS_N_INSNS (4), /* mulsi_const */
770 COSTS_N_INSNS (3), /* mulsi_const9 */
771 COSTS_N_INSNS (5), /* muldi */
772 COSTS_N_INSNS (35), /* divsi */
773 COSTS_N_INSNS (35), /* divdi */
774 COSTS_N_INSNS (11), /* fp */
775 COSTS_N_INSNS (11), /* dmul */
776 COSTS_N_INSNS (11), /* sdiv */
777 COSTS_N_INSNS (11), /* ddiv */
778 32, /* cache line size */
779 16, /* l1 cache */
780 128, /* l2 cache */
781 1, /* streams */
782 0, /* SF->DF convert */
785 /* Instruction costs on PPC440 processors. */
786 static const
787 struct processor_costs ppc440_cost = {
788 COSTS_N_INSNS (3), /* mulsi */
789 COSTS_N_INSNS (2), /* mulsi_const */
790 COSTS_N_INSNS (2), /* mulsi_const9 */
791 COSTS_N_INSNS (3), /* muldi */
792 COSTS_N_INSNS (34), /* divsi */
793 COSTS_N_INSNS (34), /* divdi */
794 COSTS_N_INSNS (5), /* fp */
795 COSTS_N_INSNS (5), /* dmul */
796 COSTS_N_INSNS (19), /* sdiv */
797 COSTS_N_INSNS (33), /* ddiv */
798 32, /* cache line size */
799 32, /* l1 cache */
800 256, /* l2 cache */
801 1, /* streams */
802 0, /* SF->DF convert */
805 /* Instruction costs on PPC476 processors. */
806 static const
807 struct processor_costs ppc476_cost = {
808 COSTS_N_INSNS (4), /* mulsi */
809 COSTS_N_INSNS (4), /* mulsi_const */
810 COSTS_N_INSNS (4), /* mulsi_const9 */
811 COSTS_N_INSNS (4), /* muldi */
812 COSTS_N_INSNS (11), /* divsi */
813 COSTS_N_INSNS (11), /* divdi */
814 COSTS_N_INSNS (6), /* fp */
815 COSTS_N_INSNS (6), /* dmul */
816 COSTS_N_INSNS (19), /* sdiv */
817 COSTS_N_INSNS (33), /* ddiv */
818 32, /* l1 cache line size */
819 32, /* l1 cache */
820 512, /* l2 cache */
821 1, /* streams */
822 0, /* SF->DF convert */
825 /* Instruction costs on PPC601 processors. */
826 static const
827 struct processor_costs ppc601_cost = {
828 COSTS_N_INSNS (5), /* mulsi */
829 COSTS_N_INSNS (5), /* mulsi_const */
830 COSTS_N_INSNS (5), /* mulsi_const9 */
831 COSTS_N_INSNS (5), /* muldi */
832 COSTS_N_INSNS (36), /* divsi */
833 COSTS_N_INSNS (36), /* divdi */
834 COSTS_N_INSNS (4), /* fp */
835 COSTS_N_INSNS (5), /* dmul */
836 COSTS_N_INSNS (17), /* sdiv */
837 COSTS_N_INSNS (31), /* ddiv */
838 32, /* cache line size */
839 32, /* l1 cache */
840 256, /* l2 cache */
841 1, /* streams */
842 0, /* SF->DF convert */
845 /* Instruction costs on PPC603 processors. */
846 static const
847 struct processor_costs ppc603_cost = {
848 COSTS_N_INSNS (5), /* mulsi */
849 COSTS_N_INSNS (3), /* mulsi_const */
850 COSTS_N_INSNS (2), /* mulsi_const9 */
851 COSTS_N_INSNS (5), /* muldi */
852 COSTS_N_INSNS (37), /* divsi */
853 COSTS_N_INSNS (37), /* divdi */
854 COSTS_N_INSNS (3), /* fp */
855 COSTS_N_INSNS (4), /* dmul */
856 COSTS_N_INSNS (18), /* sdiv */
857 COSTS_N_INSNS (33), /* ddiv */
858 32, /* cache line size */
859 8, /* l1 cache */
860 64, /* l2 cache */
861 1, /* streams */
862 0, /* SF->DF convert */
865 /* Instruction costs on PPC604 processors. */
866 static const
867 struct processor_costs ppc604_cost = {
868 COSTS_N_INSNS (4), /* mulsi */
869 COSTS_N_INSNS (4), /* mulsi_const */
870 COSTS_N_INSNS (4), /* mulsi_const9 */
871 COSTS_N_INSNS (4), /* muldi */
872 COSTS_N_INSNS (20), /* divsi */
873 COSTS_N_INSNS (20), /* divdi */
874 COSTS_N_INSNS (3), /* fp */
875 COSTS_N_INSNS (3), /* dmul */
876 COSTS_N_INSNS (18), /* sdiv */
877 COSTS_N_INSNS (32), /* ddiv */
878 32, /* cache line size */
879 16, /* l1 cache */
880 512, /* l2 cache */
881 1, /* streams */
882 0, /* SF->DF convert */
885 /* Instruction costs on PPC604e processors. */
886 static const
887 struct processor_costs ppc604e_cost = {
888 COSTS_N_INSNS (2), /* mulsi */
889 COSTS_N_INSNS (2), /* mulsi_const */
890 COSTS_N_INSNS (2), /* mulsi_const9 */
891 COSTS_N_INSNS (2), /* muldi */
892 COSTS_N_INSNS (20), /* divsi */
893 COSTS_N_INSNS (20), /* divdi */
894 COSTS_N_INSNS (3), /* fp */
895 COSTS_N_INSNS (3), /* dmul */
896 COSTS_N_INSNS (18), /* sdiv */
897 COSTS_N_INSNS (32), /* ddiv */
898 32, /* cache line size */
899 32, /* l1 cache */
900 1024, /* l2 cache */
901 1, /* streams */
902 0, /* SF->DF convert */
905 /* Instruction costs on PPC620 processors. */
906 static const
907 struct processor_costs ppc620_cost = {
908 COSTS_N_INSNS (5), /* mulsi */
909 COSTS_N_INSNS (4), /* mulsi_const */
910 COSTS_N_INSNS (3), /* mulsi_const9 */
911 COSTS_N_INSNS (7), /* muldi */
912 COSTS_N_INSNS (21), /* divsi */
913 COSTS_N_INSNS (37), /* divdi */
914 COSTS_N_INSNS (3), /* fp */
915 COSTS_N_INSNS (3), /* dmul */
916 COSTS_N_INSNS (18), /* sdiv */
917 COSTS_N_INSNS (32), /* ddiv */
918 128, /* cache line size */
919 32, /* l1 cache */
920 1024, /* l2 cache */
921 1, /* streams */
922 0, /* SF->DF convert */
925 /* Instruction costs on PPC630 processors. */
926 static const
927 struct processor_costs ppc630_cost = {
928 COSTS_N_INSNS (5), /* mulsi */
929 COSTS_N_INSNS (4), /* mulsi_const */
930 COSTS_N_INSNS (3), /* mulsi_const9 */
931 COSTS_N_INSNS (7), /* muldi */
932 COSTS_N_INSNS (21), /* divsi */
933 COSTS_N_INSNS (37), /* divdi */
934 COSTS_N_INSNS (3), /* fp */
935 COSTS_N_INSNS (3), /* dmul */
936 COSTS_N_INSNS (17), /* sdiv */
937 COSTS_N_INSNS (21), /* ddiv */
938 128, /* cache line size */
939 64, /* l1 cache */
940 1024, /* l2 cache */
941 1, /* streams */
942 0, /* SF->DF convert */
945 /* Instruction costs on Cell processor. */
946 /* COSTS_N_INSNS (1) ~ one add. */
947 static const
948 struct processor_costs ppccell_cost = {
949 COSTS_N_INSNS (9/2)+2, /* mulsi */
950 COSTS_N_INSNS (6/2), /* mulsi_const */
951 COSTS_N_INSNS (6/2), /* mulsi_const9 */
952 COSTS_N_INSNS (15/2)+2, /* muldi */
953 COSTS_N_INSNS (38/2), /* divsi */
954 COSTS_N_INSNS (70/2), /* divdi */
955 COSTS_N_INSNS (10/2), /* fp */
956 COSTS_N_INSNS (10/2), /* dmul */
957 COSTS_N_INSNS (74/2), /* sdiv */
958 COSTS_N_INSNS (74/2), /* ddiv */
959 128, /* cache line size */
960 32, /* l1 cache */
961 512, /* l2 cache */
962 6, /* streams */
963 0, /* SF->DF convert */
966 /* Instruction costs on PPC750 and PPC7400 processors. */
967 static const
968 struct processor_costs ppc750_cost = {
969 COSTS_N_INSNS (5), /* mulsi */
970 COSTS_N_INSNS (3), /* mulsi_const */
971 COSTS_N_INSNS (2), /* mulsi_const9 */
972 COSTS_N_INSNS (5), /* muldi */
973 COSTS_N_INSNS (17), /* divsi */
974 COSTS_N_INSNS (17), /* divdi */
975 COSTS_N_INSNS (3), /* fp */
976 COSTS_N_INSNS (3), /* dmul */
977 COSTS_N_INSNS (17), /* sdiv */
978 COSTS_N_INSNS (31), /* ddiv */
979 32, /* cache line size */
980 32, /* l1 cache */
981 512, /* l2 cache */
982 1, /* streams */
983 0, /* SF->DF convert */
986 /* Instruction costs on PPC7450 processors. */
987 static const
988 struct processor_costs ppc7450_cost = {
989 COSTS_N_INSNS (4), /* mulsi */
990 COSTS_N_INSNS (3), /* mulsi_const */
991 COSTS_N_INSNS (3), /* mulsi_const9 */
992 COSTS_N_INSNS (4), /* muldi */
993 COSTS_N_INSNS (23), /* divsi */
994 COSTS_N_INSNS (23), /* divdi */
995 COSTS_N_INSNS (5), /* fp */
996 COSTS_N_INSNS (5), /* dmul */
997 COSTS_N_INSNS (21), /* sdiv */
998 COSTS_N_INSNS (35), /* ddiv */
999 32, /* cache line size */
1000 32, /* l1 cache */
1001 1024, /* l2 cache */
1002 1, /* streams */
1003 0, /* SF->DF convert */
1006 /* Instruction costs on PPC8540 processors. */
1007 static const
1008 struct processor_costs ppc8540_cost = {
1009 COSTS_N_INSNS (4), /* mulsi */
1010 COSTS_N_INSNS (4), /* mulsi_const */
1011 COSTS_N_INSNS (4), /* mulsi_const9 */
1012 COSTS_N_INSNS (4), /* muldi */
1013 COSTS_N_INSNS (19), /* divsi */
1014 COSTS_N_INSNS (19), /* divdi */
1015 COSTS_N_INSNS (4), /* fp */
1016 COSTS_N_INSNS (4), /* dmul */
1017 COSTS_N_INSNS (29), /* sdiv */
1018 COSTS_N_INSNS (29), /* ddiv */
1019 32, /* cache line size */
1020 32, /* l1 cache */
1021 256, /* l2 cache */
1022 1, /* prefetch streams /*/
1023 0, /* SF->DF convert */
1026 /* Instruction costs on E300C2 and E300C3 cores. */
1027 static const
1028 struct processor_costs ppce300c2c3_cost = {
1029 COSTS_N_INSNS (4), /* mulsi */
1030 COSTS_N_INSNS (4), /* mulsi_const */
1031 COSTS_N_INSNS (4), /* mulsi_const9 */
1032 COSTS_N_INSNS (4), /* muldi */
1033 COSTS_N_INSNS (19), /* divsi */
1034 COSTS_N_INSNS (19), /* divdi */
1035 COSTS_N_INSNS (3), /* fp */
1036 COSTS_N_INSNS (4), /* dmul */
1037 COSTS_N_INSNS (18), /* sdiv */
1038 COSTS_N_INSNS (33), /* ddiv */
1040 16, /* l1 cache */
1041 16, /* l2 cache */
1042 1, /* prefetch streams /*/
1043 0, /* SF->DF convert */
1046 /* Instruction costs on PPCE500MC processors. */
1047 static const
1048 struct processor_costs ppce500mc_cost = {
1049 COSTS_N_INSNS (4), /* mulsi */
1050 COSTS_N_INSNS (4), /* mulsi_const */
1051 COSTS_N_INSNS (4), /* mulsi_const9 */
1052 COSTS_N_INSNS (4), /* muldi */
1053 COSTS_N_INSNS (14), /* divsi */
1054 COSTS_N_INSNS (14), /* divdi */
1055 COSTS_N_INSNS (8), /* fp */
1056 COSTS_N_INSNS (10), /* dmul */
1057 COSTS_N_INSNS (36), /* sdiv */
1058 COSTS_N_INSNS (66), /* ddiv */
1059 64, /* cache line size */
1060 32, /* l1 cache */
1061 128, /* l2 cache */
1062 1, /* prefetch streams /*/
1063 0, /* SF->DF convert */
1066 /* Instruction costs on PPCE500MC64 processors. */
1067 static const
1068 struct processor_costs ppce500mc64_cost = {
1069 COSTS_N_INSNS (4), /* mulsi */
1070 COSTS_N_INSNS (4), /* mulsi_const */
1071 COSTS_N_INSNS (4), /* mulsi_const9 */
1072 COSTS_N_INSNS (4), /* muldi */
1073 COSTS_N_INSNS (14), /* divsi */
1074 COSTS_N_INSNS (14), /* divdi */
1075 COSTS_N_INSNS (4), /* fp */
1076 COSTS_N_INSNS (10), /* dmul */
1077 COSTS_N_INSNS (36), /* sdiv */
1078 COSTS_N_INSNS (66), /* ddiv */
1079 64, /* cache line size */
1080 32, /* l1 cache */
1081 128, /* l2 cache */
1082 1, /* prefetch streams /*/
1083 0, /* SF->DF convert */
1086 /* Instruction costs on PPCE5500 processors. */
1087 static const
1088 struct processor_costs ppce5500_cost = {
1089 COSTS_N_INSNS (5), /* mulsi */
1090 COSTS_N_INSNS (5), /* mulsi_const */
1091 COSTS_N_INSNS (4), /* mulsi_const9 */
1092 COSTS_N_INSNS (5), /* muldi */
1093 COSTS_N_INSNS (14), /* divsi */
1094 COSTS_N_INSNS (14), /* divdi */
1095 COSTS_N_INSNS (7), /* fp */
1096 COSTS_N_INSNS (10), /* dmul */
1097 COSTS_N_INSNS (36), /* sdiv */
1098 COSTS_N_INSNS (66), /* ddiv */
1099 64, /* cache line size */
1100 32, /* l1 cache */
1101 128, /* l2 cache */
1102 1, /* prefetch streams /*/
1103 0, /* SF->DF convert */
1106 /* Instruction costs on PPCE6500 processors. */
1107 static const
1108 struct processor_costs ppce6500_cost = {
1109 COSTS_N_INSNS (5), /* mulsi */
1110 COSTS_N_INSNS (5), /* mulsi_const */
1111 COSTS_N_INSNS (4), /* mulsi_const9 */
1112 COSTS_N_INSNS (5), /* muldi */
1113 COSTS_N_INSNS (14), /* divsi */
1114 COSTS_N_INSNS (14), /* divdi */
1115 COSTS_N_INSNS (7), /* fp */
1116 COSTS_N_INSNS (10), /* dmul */
1117 COSTS_N_INSNS (36), /* sdiv */
1118 COSTS_N_INSNS (66), /* ddiv */
1119 64, /* cache line size */
1120 32, /* l1 cache */
1121 128, /* l2 cache */
1122 1, /* prefetch streams /*/
1123 0, /* SF->DF convert */
1126 /* Instruction costs on AppliedMicro Titan processors. */
1127 static const
1128 struct processor_costs titan_cost = {
1129 COSTS_N_INSNS (5), /* mulsi */
1130 COSTS_N_INSNS (5), /* mulsi_const */
1131 COSTS_N_INSNS (5), /* mulsi_const9 */
1132 COSTS_N_INSNS (5), /* muldi */
1133 COSTS_N_INSNS (18), /* divsi */
1134 COSTS_N_INSNS (18), /* divdi */
1135 COSTS_N_INSNS (10), /* fp */
1136 COSTS_N_INSNS (10), /* dmul */
1137 COSTS_N_INSNS (46), /* sdiv */
1138 COSTS_N_INSNS (72), /* ddiv */
1139 32, /* cache line size */
1140 32, /* l1 cache */
1141 512, /* l2 cache */
1142 1, /* prefetch streams /*/
1143 0, /* SF->DF convert */
1146 /* Instruction costs on POWER4 and POWER5 processors. */
1147 static const
1148 struct processor_costs power4_cost = {
1149 COSTS_N_INSNS (3), /* mulsi */
1150 COSTS_N_INSNS (2), /* mulsi_const */
1151 COSTS_N_INSNS (2), /* mulsi_const9 */
1152 COSTS_N_INSNS (4), /* muldi */
1153 COSTS_N_INSNS (18), /* divsi */
1154 COSTS_N_INSNS (34), /* divdi */
1155 COSTS_N_INSNS (3), /* fp */
1156 COSTS_N_INSNS (3), /* dmul */
1157 COSTS_N_INSNS (17), /* sdiv */
1158 COSTS_N_INSNS (17), /* ddiv */
1159 128, /* cache line size */
1160 32, /* l1 cache */
1161 1024, /* l2 cache */
1162 8, /* prefetch streams /*/
1163 0, /* SF->DF convert */
1166 /* Instruction costs on POWER6 processors. */
1167 static const
1168 struct processor_costs power6_cost = {
1169 COSTS_N_INSNS (8), /* mulsi */
1170 COSTS_N_INSNS (8), /* mulsi_const */
1171 COSTS_N_INSNS (8), /* mulsi_const9 */
1172 COSTS_N_INSNS (8), /* muldi */
1173 COSTS_N_INSNS (22), /* divsi */
1174 COSTS_N_INSNS (28), /* divdi */
1175 COSTS_N_INSNS (3), /* fp */
1176 COSTS_N_INSNS (3), /* dmul */
1177 COSTS_N_INSNS (13), /* sdiv */
1178 COSTS_N_INSNS (16), /* ddiv */
1179 128, /* cache line size */
1180 64, /* l1 cache */
1181 2048, /* l2 cache */
1182 16, /* prefetch streams */
1183 0, /* SF->DF convert */
1186 /* Instruction costs on POWER7 processors. */
1187 static const
1188 struct processor_costs power7_cost = {
1189 COSTS_N_INSNS (2), /* mulsi */
1190 COSTS_N_INSNS (2), /* mulsi_const */
1191 COSTS_N_INSNS (2), /* mulsi_const9 */
1192 COSTS_N_INSNS (2), /* muldi */
1193 COSTS_N_INSNS (18), /* divsi */
1194 COSTS_N_INSNS (34), /* divdi */
1195 COSTS_N_INSNS (3), /* fp */
1196 COSTS_N_INSNS (3), /* dmul */
1197 COSTS_N_INSNS (13), /* sdiv */
1198 COSTS_N_INSNS (16), /* ddiv */
1199 128, /* cache line size */
1200 32, /* l1 cache */
1201 256, /* l2 cache */
1202 12, /* prefetch streams */
1203 COSTS_N_INSNS (3), /* SF->DF convert */
1206 /* Instruction costs on POWER8 processors. */
1207 static const
1208 struct processor_costs power8_cost = {
1209 COSTS_N_INSNS (3), /* mulsi */
1210 COSTS_N_INSNS (3), /* mulsi_const */
1211 COSTS_N_INSNS (3), /* mulsi_const9 */
1212 COSTS_N_INSNS (3), /* muldi */
1213 COSTS_N_INSNS (19), /* divsi */
1214 COSTS_N_INSNS (35), /* divdi */
1215 COSTS_N_INSNS (3), /* fp */
1216 COSTS_N_INSNS (3), /* dmul */
1217 COSTS_N_INSNS (14), /* sdiv */
1218 COSTS_N_INSNS (17), /* ddiv */
1219 128, /* cache line size */
1220 32, /* l1 cache */
1221 256, /* l2 cache */
1222 12, /* prefetch streams */
1223 COSTS_N_INSNS (3), /* SF->DF convert */
1226 /* Instruction costs on POWER9 processors. */
1227 static const
1228 struct processor_costs power9_cost = {
1229 COSTS_N_INSNS (3), /* mulsi */
1230 COSTS_N_INSNS (3), /* mulsi_const */
1231 COSTS_N_INSNS (3), /* mulsi_const9 */
1232 COSTS_N_INSNS (3), /* muldi */
1233 COSTS_N_INSNS (8), /* divsi */
1234 COSTS_N_INSNS (12), /* divdi */
1235 COSTS_N_INSNS (3), /* fp */
1236 COSTS_N_INSNS (3), /* dmul */
1237 COSTS_N_INSNS (13), /* sdiv */
1238 COSTS_N_INSNS (18), /* ddiv */
1239 128, /* cache line size */
1240 32, /* l1 cache */
1241 512, /* l2 cache */
1242 8, /* prefetch streams */
1243 COSTS_N_INSNS (3), /* SF->DF convert */
1246 /* Instruction costs on POWER A2 processors. */
1247 static const
1248 struct processor_costs ppca2_cost = {
1249 COSTS_N_INSNS (16), /* mulsi */
1250 COSTS_N_INSNS (16), /* mulsi_const */
1251 COSTS_N_INSNS (16), /* mulsi_const9 */
1252 COSTS_N_INSNS (16), /* muldi */
1253 COSTS_N_INSNS (22), /* divsi */
1254 COSTS_N_INSNS (28), /* divdi */
1255 COSTS_N_INSNS (3), /* fp */
1256 COSTS_N_INSNS (3), /* dmul */
1257 COSTS_N_INSNS (59), /* sdiv */
1258 COSTS_N_INSNS (72), /* ddiv */
1260 16, /* l1 cache */
1261 2048, /* l2 cache */
1262 16, /* prefetch streams */
1263 0, /* SF->DF convert */
1267 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1268 #undef RS6000_BUILTIN_0
1269 #undef RS6000_BUILTIN_1
1270 #undef RS6000_BUILTIN_2
1271 #undef RS6000_BUILTIN_3
1272 #undef RS6000_BUILTIN_A
1273 #undef RS6000_BUILTIN_D
1274 #undef RS6000_BUILTIN_H
1275 #undef RS6000_BUILTIN_P
1276 #undef RS6000_BUILTIN_Q
1277 #undef RS6000_BUILTIN_X
1279 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1280 { NAME, ICODE, MASK, ATTR },
1282 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1283 { NAME, ICODE, MASK, ATTR },
1285 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1286 { NAME, ICODE, MASK, ATTR },
1288 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1289 { NAME, ICODE, MASK, ATTR },
1291 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1292 { NAME, ICODE, MASK, ATTR },
1294 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1295 { NAME, ICODE, MASK, ATTR },
1297 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1298 { NAME, ICODE, MASK, ATTR },
1300 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1301 { NAME, ICODE, MASK, ATTR },
1303 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1304 { NAME, ICODE, MASK, ATTR },
1306 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1307 { NAME, ICODE, MASK, ATTR },
1309 struct rs6000_builtin_info_type {
1310 const char *name;
1311 const enum insn_code icode;
1312 const HOST_WIDE_INT mask;
1313 const unsigned attr;
1316 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1318 #include "rs6000-builtin.def"
1321 #undef RS6000_BUILTIN_0
1322 #undef RS6000_BUILTIN_1
1323 #undef RS6000_BUILTIN_2
1324 #undef RS6000_BUILTIN_3
1325 #undef RS6000_BUILTIN_A
1326 #undef RS6000_BUILTIN_D
1327 #undef RS6000_BUILTIN_H
1328 #undef RS6000_BUILTIN_P
1329 #undef RS6000_BUILTIN_Q
1330 #undef RS6000_BUILTIN_X
1332 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1333 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1336 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1337 static struct machine_function * rs6000_init_machine_status (void);
1338 static int rs6000_ra_ever_killed (void);
1339 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1340 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1341 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1342 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1343 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1344 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1345 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1346 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1347 bool);
1348 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1349 unsigned int);
1350 static bool is_microcoded_insn (rtx_insn *);
1351 static bool is_nonpipeline_insn (rtx_insn *);
1352 static bool is_cracked_insn (rtx_insn *);
1353 static bool is_load_insn (rtx, rtx *);
1354 static bool is_store_insn (rtx, rtx *);
1355 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1356 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1357 static bool insn_must_be_first_in_group (rtx_insn *);
1358 static bool insn_must_be_last_in_group (rtx_insn *);
1359 static void altivec_init_builtins (void);
1360 static tree builtin_function_type (machine_mode, machine_mode,
1361 machine_mode, machine_mode,
1362 enum rs6000_builtins, const char *name);
1363 static void rs6000_common_init_builtins (void);
1364 static void paired_init_builtins (void);
1365 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1366 static void htm_init_builtins (void);
1367 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1368 static rs6000_stack_t *rs6000_stack_info (void);
1369 static void is_altivec_return_reg (rtx, void *);
1370 int easy_vector_constant (rtx, machine_mode);
1371 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1372 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1373 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1374 bool, bool);
1375 #if TARGET_MACHO
1376 static void macho_branch_islands (void);
1377 #endif
1378 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1379 int, int *);
1380 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1381 int, int, int *);
1382 static bool rs6000_mode_dependent_address (const_rtx);
1383 static bool rs6000_debug_mode_dependent_address (const_rtx);
1384 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1385 machine_mode, rtx);
1386 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1387 machine_mode,
1388 rtx);
1389 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1390 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1391 enum reg_class);
1392 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1393 machine_mode);
1394 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1395 enum reg_class,
1396 machine_mode);
1397 static bool rs6000_cannot_change_mode_class (machine_mode,
1398 machine_mode,
1399 enum reg_class);
1400 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1401 machine_mode,
1402 enum reg_class);
1403 static bool rs6000_save_toc_in_prologue_p (void);
1404 static rtx rs6000_internal_arg_pointer (void);
1406 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1407 int, int *)
1408 = rs6000_legitimize_reload_address;
1410 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1411 = rs6000_mode_dependent_address;
1413 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1414 machine_mode, rtx)
1415 = rs6000_secondary_reload_class;
1417 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1418 = rs6000_preferred_reload_class;
1420 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1421 machine_mode)
1422 = rs6000_secondary_memory_needed;
1424 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1425 machine_mode,
1426 enum reg_class)
1427 = rs6000_cannot_change_mode_class;
1429 const int INSN_NOT_AVAILABLE = -1;
1431 static void rs6000_print_isa_options (FILE *, int, const char *,
1432 HOST_WIDE_INT);
1433 static void rs6000_print_builtin_options (FILE *, int, const char *,
1434 HOST_WIDE_INT);
1435 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1437 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1438 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1439 enum rs6000_reg_type,
1440 machine_mode,
1441 secondary_reload_info *,
1442 bool);
1443 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1444 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1445 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1447 /* Hash table stuff for keeping track of TOC entries. */
1449 struct GTY((for_user)) toc_hash_struct
1451 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1452 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1453 rtx key;
1454 machine_mode key_mode;
1455 int labelno;
1458 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1460 static hashval_t hash (toc_hash_struct *);
1461 static bool equal (toc_hash_struct *, toc_hash_struct *);
1464 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1466 /* Hash table to keep track of the argument types for builtin functions. */
1468 struct GTY((for_user)) builtin_hash_struct
1470 tree type;
1471 machine_mode mode[4]; /* return value + 3 arguments. */
1472 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1475 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1477 static hashval_t hash (builtin_hash_struct *);
1478 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1481 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1484 /* Default register names. */
1485 char rs6000_reg_names[][8] =
1487 "0", "1", "2", "3", "4", "5", "6", "7",
1488 "8", "9", "10", "11", "12", "13", "14", "15",
1489 "16", "17", "18", "19", "20", "21", "22", "23",
1490 "24", "25", "26", "27", "28", "29", "30", "31",
1491 "0", "1", "2", "3", "4", "5", "6", "7",
1492 "8", "9", "10", "11", "12", "13", "14", "15",
1493 "16", "17", "18", "19", "20", "21", "22", "23",
1494 "24", "25", "26", "27", "28", "29", "30", "31",
1495 "mq", "lr", "ctr","ap",
1496 "0", "1", "2", "3", "4", "5", "6", "7",
1497 "ca",
1498 /* AltiVec registers. */
1499 "0", "1", "2", "3", "4", "5", "6", "7",
1500 "8", "9", "10", "11", "12", "13", "14", "15",
1501 "16", "17", "18", "19", "20", "21", "22", "23",
1502 "24", "25", "26", "27", "28", "29", "30", "31",
1503 "vrsave", "vscr",
1504 /* Soft frame pointer. */
1505 "sfp",
1506 /* HTM SPR registers. */
1507 "tfhar", "tfiar", "texasr"
1510 #ifdef TARGET_REGNAMES
1511 static const char alt_reg_names[][8] =
1513 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1514 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1515 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1516 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1517 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1518 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1519 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1520 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1521 "mq", "lr", "ctr", "ap",
1522 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1523 "ca",
1524 /* AltiVec registers. */
1525 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1526 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1527 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1528 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1529 "vrsave", "vscr",
1530 /* Soft frame pointer. */
1531 "sfp",
1532 /* HTM SPR registers. */
1533 "tfhar", "tfiar", "texasr"
1535 #endif
1537 /* Table of valid machine attributes. */
1539 static const struct attribute_spec rs6000_attribute_table[] =
1541 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1542 affects_type_identity } */
1543 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1544 false },
1545 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1546 false },
1547 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1548 false },
1549 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1550 false },
1551 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1552 false },
1553 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1554 SUBTARGET_ATTRIBUTE_TABLE,
1555 #endif
1556 { NULL, 0, 0, false, false, false, NULL, false }
1559 #ifndef TARGET_PROFILE_KERNEL
1560 #define TARGET_PROFILE_KERNEL 0
1561 #endif
1563 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1564 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1566 /* Initialize the GCC target structure. */
1567 #undef TARGET_ATTRIBUTE_TABLE
1568 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1569 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1570 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1571 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1572 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1574 #undef TARGET_ASM_ALIGNED_DI_OP
1575 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1577 /* Default unaligned ops are only provided for ELF. Find the ops needed
1578 for non-ELF systems. */
1579 #ifndef OBJECT_FORMAT_ELF
1580 #if TARGET_XCOFF
1581 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1582 64-bit targets. */
1583 #undef TARGET_ASM_UNALIGNED_HI_OP
1584 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1585 #undef TARGET_ASM_UNALIGNED_SI_OP
1586 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1587 #undef TARGET_ASM_UNALIGNED_DI_OP
1588 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1589 #else
1590 /* For Darwin. */
1591 #undef TARGET_ASM_UNALIGNED_HI_OP
1592 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1593 #undef TARGET_ASM_UNALIGNED_SI_OP
1594 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1595 #undef TARGET_ASM_UNALIGNED_DI_OP
1596 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1597 #undef TARGET_ASM_ALIGNED_DI_OP
1598 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1599 #endif
1600 #endif
1602 /* This hook deals with fixups for relocatable code and DI-mode objects
1603 in 64-bit code. */
1604 #undef TARGET_ASM_INTEGER
1605 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1607 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1608 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1609 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1610 #endif
1612 #undef TARGET_SET_UP_BY_PROLOGUE
1613 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1615 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1616 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1617 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1618 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1619 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1620 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1621 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1622 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1623 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1624 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1625 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1626 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1628 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1629 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1631 #undef TARGET_INTERNAL_ARG_POINTER
1632 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1634 #undef TARGET_HAVE_TLS
1635 #define TARGET_HAVE_TLS HAVE_AS_TLS
1637 #undef TARGET_CANNOT_FORCE_CONST_MEM
1638 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1640 #undef TARGET_DELEGITIMIZE_ADDRESS
1641 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1643 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1644 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1646 #undef TARGET_LEGITIMATE_COMBINED_INSN
1647 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1649 #undef TARGET_ASM_FUNCTION_PROLOGUE
1650 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1651 #undef TARGET_ASM_FUNCTION_EPILOGUE
1652 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1654 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1655 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1657 #undef TARGET_LEGITIMIZE_ADDRESS
1658 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1660 #undef TARGET_SCHED_VARIABLE_ISSUE
1661 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1663 #undef TARGET_SCHED_ISSUE_RATE
1664 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1665 #undef TARGET_SCHED_ADJUST_COST
1666 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1667 #undef TARGET_SCHED_ADJUST_PRIORITY
1668 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1669 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1670 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1671 #undef TARGET_SCHED_INIT
1672 #define TARGET_SCHED_INIT rs6000_sched_init
1673 #undef TARGET_SCHED_FINISH
1674 #define TARGET_SCHED_FINISH rs6000_sched_finish
1675 #undef TARGET_SCHED_REORDER
1676 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1677 #undef TARGET_SCHED_REORDER2
1678 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1680 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1681 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1686 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1687 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1688 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1689 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1690 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1691 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1692 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1693 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1695 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1696 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1698 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1699 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1700 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1701 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1702 rs6000_builtin_support_vector_misalignment
1703 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1704 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1705 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1706 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1707 rs6000_builtin_vectorization_cost
1708 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1709 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1710 rs6000_preferred_simd_mode
1711 #undef TARGET_VECTORIZE_INIT_COST
1712 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1713 #undef TARGET_VECTORIZE_ADD_STMT_COST
1714 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1715 #undef TARGET_VECTORIZE_FINISH_COST
1716 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1717 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1718 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1720 #undef TARGET_INIT_BUILTINS
1721 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1722 #undef TARGET_BUILTIN_DECL
1723 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1725 #undef TARGET_FOLD_BUILTIN
1726 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1727 #undef TARGET_GIMPLE_FOLD_BUILTIN
1728 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1730 #undef TARGET_EXPAND_BUILTIN
1731 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1733 #undef TARGET_MANGLE_TYPE
1734 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1736 #undef TARGET_INIT_LIBFUNCS
1737 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1739 #if TARGET_MACHO
1740 #undef TARGET_BINDS_LOCAL_P
1741 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1742 #endif
1744 #undef TARGET_MS_BITFIELD_LAYOUT_P
1745 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1747 #undef TARGET_ASM_OUTPUT_MI_THUNK
1748 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1750 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1751 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1753 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1754 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1756 #undef TARGET_REGISTER_MOVE_COST
1757 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1758 #undef TARGET_MEMORY_MOVE_COST
1759 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1760 #undef TARGET_CANNOT_COPY_INSN_P
1761 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1762 #undef TARGET_RTX_COSTS
1763 #define TARGET_RTX_COSTS rs6000_rtx_costs
1764 #undef TARGET_ADDRESS_COST
1765 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1767 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1768 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1770 #undef TARGET_PROMOTE_FUNCTION_MODE
1771 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1773 #undef TARGET_RETURN_IN_MEMORY
1774 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1776 #undef TARGET_RETURN_IN_MSB
1777 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1779 #undef TARGET_SETUP_INCOMING_VARARGS
1780 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1782 /* Always strict argument naming on rs6000. */
1783 #undef TARGET_STRICT_ARGUMENT_NAMING
1784 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1785 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1786 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1787 #undef TARGET_SPLIT_COMPLEX_ARG
1788 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1789 #undef TARGET_MUST_PASS_IN_STACK
1790 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1791 #undef TARGET_PASS_BY_REFERENCE
1792 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1793 #undef TARGET_ARG_PARTIAL_BYTES
1794 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1795 #undef TARGET_FUNCTION_ARG_ADVANCE
1796 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1797 #undef TARGET_FUNCTION_ARG
1798 #define TARGET_FUNCTION_ARG rs6000_function_arg
1799 #undef TARGET_FUNCTION_ARG_BOUNDARY
1800 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1802 #undef TARGET_BUILD_BUILTIN_VA_LIST
1803 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1805 #undef TARGET_EXPAND_BUILTIN_VA_START
1806 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1808 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1809 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1811 #undef TARGET_EH_RETURN_FILTER_MODE
1812 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1814 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1815 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1817 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1818 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1820 #undef TARGET_FLOATN_MODE
1821 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1823 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1824 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1826 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1827 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1829 #undef TARGET_MD_ASM_ADJUST
1830 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1832 #undef TARGET_OPTION_OVERRIDE
1833 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1835 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1836 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1837 rs6000_builtin_vectorized_function
1839 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1840 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1841 rs6000_builtin_md_vectorized_function
1843 #undef TARGET_STACK_PROTECT_GUARD
1844 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1846 #if !TARGET_MACHO
1847 #undef TARGET_STACK_PROTECT_FAIL
1848 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1849 #endif
1851 #ifdef HAVE_AS_TLS
1852 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1853 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1854 #endif
1856 /* Use a 32-bit anchor range. This leads to sequences like:
1858 addis tmp,anchor,high
1859 add dest,tmp,low
1861 where tmp itself acts as an anchor, and can be shared between
1862 accesses to the same 64k page. */
1863 #undef TARGET_MIN_ANCHOR_OFFSET
1864 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1865 #undef TARGET_MAX_ANCHOR_OFFSET
1866 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1867 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1868 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1869 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1870 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1872 #undef TARGET_BUILTIN_RECIPROCAL
1873 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1875 #undef TARGET_EXPAND_TO_RTL_HOOK
1876 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1878 #undef TARGET_INSTANTIATE_DECLS
1879 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1881 #undef TARGET_SECONDARY_RELOAD
1882 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1884 #undef TARGET_LEGITIMATE_ADDRESS_P
1885 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1887 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1888 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1890 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1891 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1893 #undef TARGET_CAN_ELIMINATE
1894 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1896 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1897 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1899 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1900 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1902 #undef TARGET_TRAMPOLINE_INIT
1903 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1905 #undef TARGET_FUNCTION_VALUE
1906 #define TARGET_FUNCTION_VALUE rs6000_function_value
1908 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1909 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1911 #undef TARGET_OPTION_SAVE
1912 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1914 #undef TARGET_OPTION_RESTORE
1915 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1917 #undef TARGET_OPTION_PRINT
1918 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1920 #undef TARGET_CAN_INLINE_P
1921 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1923 #undef TARGET_SET_CURRENT_FUNCTION
1924 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1926 #undef TARGET_LEGITIMATE_CONSTANT_P
1927 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1929 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1930 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1932 #undef TARGET_CAN_USE_DOLOOP_P
1933 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1935 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1936 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1938 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1939 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1940 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1941 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1942 #undef TARGET_UNWIND_WORD_MODE
1943 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1945 #undef TARGET_OFFLOAD_OPTIONS
1946 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1948 #undef TARGET_C_MODE_FOR_SUFFIX
1949 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1951 #undef TARGET_INVALID_BINARY_OP
1952 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1954 #undef TARGET_OPTAB_SUPPORTED_P
1955 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1957 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1958 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1960 #undef TARGET_COMPARE_VERSION_PRIORITY
1961 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1963 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1964 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1965 rs6000_generate_version_dispatcher_body
1967 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1968 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1969 rs6000_get_function_versions_dispatcher
1971 #undef TARGET_OPTION_FUNCTION_VERSIONS
1972 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1976 /* Processor table. */
1977 struct rs6000_ptt
1979 const char *const name; /* Canonical processor name. */
1980 const enum processor_type processor; /* Processor type enum value. */
1981 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1984 static struct rs6000_ptt const processor_target_table[] =
1986 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1987 #include "rs6000-cpus.def"
1988 #undef RS6000_CPU
1991 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1992 name is invalid. */
1994 static int
1995 rs6000_cpu_name_lookup (const char *name)
1997 size_t i;
1999 if (name != NULL)
2001 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2002 if (! strcmp (name, processor_target_table[i].name))
2003 return (int)i;
2006 return -1;
2010 /* Return number of consecutive hard regs needed starting at reg REGNO
2011 to hold something of mode MODE.
2012 This is ordinarily the length in words of a value of mode MODE
2013 but can be less for certain modes in special long registers.
2015 POWER and PowerPC GPRs hold 32 bits worth;
2016 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2018 static int
2019 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2021 unsigned HOST_WIDE_INT reg_size;
2023 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2024 128-bit floating point that can go in vector registers, which has VSX
2025 memory addressing. */
2026 if (FP_REGNO_P (regno))
2027 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2028 ? UNITS_PER_VSX_WORD
2029 : UNITS_PER_FP_WORD);
2031 else if (ALTIVEC_REGNO_P (regno))
2032 reg_size = UNITS_PER_ALTIVEC_WORD;
2034 else
2035 reg_size = UNITS_PER_WORD;
2037 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2040 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2041 MODE. */
2042 static int
2043 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
2045 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2047 if (COMPLEX_MODE_P (mode))
2048 mode = GET_MODE_INNER (mode);
2050 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2051 register combinations, and use PTImode where we need to deal with quad
2052 word memory operations. Don't allow quad words in the argument or frame
2053 pointer registers, just registers 0..31. */
2054 if (mode == PTImode)
2055 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2056 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2057 && ((regno & 1) == 0));
2059 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2060 implementations. Don't allow an item to be split between a FP register
2061 and an Altivec register. Allow TImode in all VSX registers if the user
2062 asked for it. */
2063 if (TARGET_VSX && VSX_REGNO_P (regno)
2064 && (VECTOR_MEM_VSX_P (mode)
2065 || FLOAT128_VECTOR_P (mode)
2066 || reg_addr[mode].scalar_in_vmx_p
2067 || (TARGET_VSX_TIMODE && mode == TImode)
2068 || (TARGET_VADDUQM && mode == V1TImode)))
2070 if (FP_REGNO_P (regno))
2071 return FP_REGNO_P (last_regno);
2073 if (ALTIVEC_REGNO_P (regno))
2075 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2076 return 0;
2078 return ALTIVEC_REGNO_P (last_regno);
2082 /* The GPRs can hold any mode, but values bigger than one register
2083 cannot go past R31. */
2084 if (INT_REGNO_P (regno))
2085 return INT_REGNO_P (last_regno);
2087 /* The float registers (except for VSX vector modes) can only hold floating
2088 modes and DImode. */
2089 if (FP_REGNO_P (regno))
2091 if (FLOAT128_VECTOR_P (mode))
2092 return false;
2094 if (SCALAR_FLOAT_MODE_P (mode)
2095 && (mode != TDmode || (regno % 2) == 0)
2096 && FP_REGNO_P (last_regno))
2097 return 1;
2099 if (GET_MODE_CLASS (mode) == MODE_INT)
2101 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2102 return 1;
2104 if (TARGET_P8_VECTOR && (mode == SImode))
2105 return 1;
2107 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2108 return 1;
2111 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2112 && PAIRED_VECTOR_MODE (mode))
2113 return 1;
2115 return 0;
2118 /* The CR register can only hold CC modes. */
2119 if (CR_REGNO_P (regno))
2120 return GET_MODE_CLASS (mode) == MODE_CC;
2122 if (CA_REGNO_P (regno))
2123 return mode == Pmode || mode == SImode;
2125 /* AltiVec only in AldyVec registers. */
2126 if (ALTIVEC_REGNO_P (regno))
2127 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2128 || mode == V1TImode);
2130 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2131 and it must be able to fit within the register set. */
2133 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2136 /* Print interesting facts about registers. */
2137 static void
2138 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2140 int r, m;
2142 for (r = first_regno; r <= last_regno; ++r)
2144 const char *comma = "";
2145 int len;
2147 if (first_regno == last_regno)
2148 fprintf (stderr, "%s:\t", reg_name);
2149 else
2150 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2152 len = 8;
2153 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2154 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2156 if (len > 70)
2158 fprintf (stderr, ",\n\t");
2159 len = 8;
2160 comma = "";
2163 if (rs6000_hard_regno_nregs[m][r] > 1)
2164 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2165 rs6000_hard_regno_nregs[m][r]);
2166 else
2167 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2169 comma = ", ";
2172 if (call_used_regs[r])
2174 if (len > 70)
2176 fprintf (stderr, ",\n\t");
2177 len = 8;
2178 comma = "";
2181 len += fprintf (stderr, "%s%s", comma, "call-used");
2182 comma = ", ";
2185 if (fixed_regs[r])
2187 if (len > 70)
2189 fprintf (stderr, ",\n\t");
2190 len = 8;
2191 comma = "";
2194 len += fprintf (stderr, "%s%s", comma, "fixed");
2195 comma = ", ";
2198 if (len > 70)
2200 fprintf (stderr, ",\n\t");
2201 comma = "";
2204 len += fprintf (stderr, "%sreg-class = %s", comma,
2205 reg_class_names[(int)rs6000_regno_regclass[r]]);
2206 comma = ", ";
2208 if (len > 70)
2210 fprintf (stderr, ",\n\t");
2211 comma = "";
2214 fprintf (stderr, "%sregno = %d\n", comma, r);
2218 static const char *
2219 rs6000_debug_vector_unit (enum rs6000_vector v)
2221 const char *ret;
2223 switch (v)
2225 case VECTOR_NONE: ret = "none"; break;
2226 case VECTOR_ALTIVEC: ret = "altivec"; break;
2227 case VECTOR_VSX: ret = "vsx"; break;
2228 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2229 case VECTOR_PAIRED: ret = "paired"; break;
2230 case VECTOR_OTHER: ret = "other"; break;
2231 default: ret = "unknown"; break;
2234 return ret;
2237 /* Inner function printing just the address mask for a particular reload
2238 register class. */
2239 DEBUG_FUNCTION char *
2240 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2242 static char ret[8];
2243 char *p = ret;
2245 if ((mask & RELOAD_REG_VALID) != 0)
2246 *p++ = 'v';
2247 else if (keep_spaces)
2248 *p++ = ' ';
2250 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2251 *p++ = 'm';
2252 else if (keep_spaces)
2253 *p++ = ' ';
2255 if ((mask & RELOAD_REG_INDEXED) != 0)
2256 *p++ = 'i';
2257 else if (keep_spaces)
2258 *p++ = ' ';
2260 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2261 *p++ = 'O';
2262 else if ((mask & RELOAD_REG_OFFSET) != 0)
2263 *p++ = 'o';
2264 else if (keep_spaces)
2265 *p++ = ' ';
2267 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2268 *p++ = '+';
2269 else if (keep_spaces)
2270 *p++ = ' ';
2272 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2273 *p++ = '+';
2274 else if (keep_spaces)
2275 *p++ = ' ';
2277 if ((mask & RELOAD_REG_AND_M16) != 0)
2278 *p++ = '&';
2279 else if (keep_spaces)
2280 *p++ = ' ';
2282 *p = '\0';
2284 return ret;
2287 /* Print the address masks in a human readble fashion. */
2288 DEBUG_FUNCTION void
2289 rs6000_debug_print_mode (ssize_t m)
2291 ssize_t rc;
2292 int spaces = 0;
2293 bool fuse_extra_p;
2295 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2296 for (rc = 0; rc < N_RELOAD_REG; rc++)
2297 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2298 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2300 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2301 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2302 fprintf (stderr, " Reload=%c%c",
2303 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2304 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2305 else
2306 spaces += sizeof (" Reload=sl") - 1;
2308 if (reg_addr[m].scalar_in_vmx_p)
2310 fprintf (stderr, "%*s Upper=y", spaces, "");
2311 spaces = 0;
2313 else
2314 spaces += sizeof (" Upper=y") - 1;
2316 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2317 || reg_addr[m].fused_toc);
2318 if (!fuse_extra_p)
2320 for (rc = 0; rc < N_RELOAD_REG; rc++)
2322 if (rc != RELOAD_REG_ANY)
2324 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2325 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2326 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2327 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2328 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2330 fuse_extra_p = true;
2331 break;
2337 if (fuse_extra_p)
2339 fprintf (stderr, "%*s Fuse:", spaces, "");
2340 spaces = 0;
2342 for (rc = 0; rc < N_RELOAD_REG; rc++)
2344 if (rc != RELOAD_REG_ANY)
2346 char load, store;
2348 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2349 load = 'l';
2350 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2351 load = 'L';
2352 else
2353 load = '-';
2355 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2356 store = 's';
2357 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2358 store = 'S';
2359 else
2360 store = '-';
2362 if (load == '-' && store == '-')
2363 spaces += 5;
2364 else
2366 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2367 reload_reg_map[rc].name[0], load, store);
2368 spaces = 0;
2373 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2375 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2376 spaces = 0;
2378 else
2379 spaces += sizeof (" P8gpr") - 1;
2381 if (reg_addr[m].fused_toc)
2383 fprintf (stderr, "%*sToc", (spaces + 1), "");
2384 spaces = 0;
2386 else
2387 spaces += sizeof (" Toc") - 1;
2389 else
2390 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2392 if (rs6000_vector_unit[m] != VECTOR_NONE
2393 || rs6000_vector_mem[m] != VECTOR_NONE)
2395 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2396 spaces, "",
2397 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2398 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2401 fputs ("\n", stderr);
2404 #define DEBUG_FMT_ID "%-32s= "
2405 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2406 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2407 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2409 /* Print various interesting information with -mdebug=reg. */
2410 static void
2411 rs6000_debug_reg_global (void)
2413 static const char *const tf[2] = { "false", "true" };
2414 const char *nl = (const char *)0;
2415 int m;
2416 size_t m1, m2, v;
2417 char costly_num[20];
2418 char nop_num[20];
2419 char flags_buffer[40];
2420 const char *costly_str;
2421 const char *nop_str;
2422 const char *trace_str;
2423 const char *abi_str;
2424 const char *cmodel_str;
2425 struct cl_target_option cl_opts;
2427 /* Modes we want tieable information on. */
2428 static const machine_mode print_tieable_modes[] = {
2429 QImode,
2430 HImode,
2431 SImode,
2432 DImode,
2433 TImode,
2434 PTImode,
2435 SFmode,
2436 DFmode,
2437 TFmode,
2438 IFmode,
2439 KFmode,
2440 SDmode,
2441 DDmode,
2442 TDmode,
2443 V2SImode,
2444 V16QImode,
2445 V8HImode,
2446 V4SImode,
2447 V2DImode,
2448 V1TImode,
2449 V32QImode,
2450 V16HImode,
2451 V8SImode,
2452 V4DImode,
2453 V2TImode,
2454 V2SFmode,
2455 V4SFmode,
2456 V2DFmode,
2457 V8SFmode,
2458 V4DFmode,
2459 CCmode,
2460 CCUNSmode,
2461 CCEQmode,
2464 /* Virtual regs we are interested in. */
2465 const static struct {
2466 int regno; /* register number. */
2467 const char *name; /* register name. */
2468 } virtual_regs[] = {
2469 { STACK_POINTER_REGNUM, "stack pointer:" },
2470 { TOC_REGNUM, "toc: " },
2471 { STATIC_CHAIN_REGNUM, "static chain: " },
2472 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2473 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2474 { ARG_POINTER_REGNUM, "arg pointer: " },
2475 { FRAME_POINTER_REGNUM, "frame pointer:" },
2476 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2477 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2478 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2479 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2480 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2481 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2482 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2483 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2484 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2487 fputs ("\nHard register information:\n", stderr);
2488 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2489 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2490 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2491 LAST_ALTIVEC_REGNO,
2492 "vs");
2493 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2494 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2495 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2496 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2497 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2498 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2500 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2501 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2502 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2504 fprintf (stderr,
2505 "\n"
2506 "d reg_class = %s\n"
2507 "f reg_class = %s\n"
2508 "v reg_class = %s\n"
2509 "wa reg_class = %s\n"
2510 "wb reg_class = %s\n"
2511 "wd reg_class = %s\n"
2512 "we reg_class = %s\n"
2513 "wf reg_class = %s\n"
2514 "wg reg_class = %s\n"
2515 "wh reg_class = %s\n"
2516 "wi reg_class = %s\n"
2517 "wj reg_class = %s\n"
2518 "wk reg_class = %s\n"
2519 "wl reg_class = %s\n"
2520 "wm reg_class = %s\n"
2521 "wo reg_class = %s\n"
2522 "wp reg_class = %s\n"
2523 "wq reg_class = %s\n"
2524 "wr reg_class = %s\n"
2525 "ws reg_class = %s\n"
2526 "wt reg_class = %s\n"
2527 "wu reg_class = %s\n"
2528 "wv reg_class = %s\n"
2529 "ww reg_class = %s\n"
2530 "wx reg_class = %s\n"
2531 "wy reg_class = %s\n"
2532 "wz reg_class = %s\n"
2533 "wA reg_class = %s\n"
2534 "wH reg_class = %s\n"
2535 "wI reg_class = %s\n"
2536 "wJ reg_class = %s\n"
2537 "wK reg_class = %s\n"
2538 "\n",
2539 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2540 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2541 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2542 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2543 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2544 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2545 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2546 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2547 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2548 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2549 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2550 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2551 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2552 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2553 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2554 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2555 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2556 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2557 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2558 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2559 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2560 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2561 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2562 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2563 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2564 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2565 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2566 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2567 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2568 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2569 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2570 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2572 nl = "\n";
2573 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2574 rs6000_debug_print_mode (m);
2576 fputs ("\n", stderr);
2578 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2580 machine_mode mode1 = print_tieable_modes[m1];
2581 bool first_time = true;
2583 nl = (const char *)0;
2584 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2586 machine_mode mode2 = print_tieable_modes[m2];
2587 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2589 if (first_time)
2591 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2592 nl = "\n";
2593 first_time = false;
2596 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2600 if (!first_time)
2601 fputs ("\n", stderr);
2604 if (nl)
2605 fputs (nl, stderr);
2607 if (rs6000_recip_control)
2609 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2611 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2612 if (rs6000_recip_bits[m])
2614 fprintf (stderr,
2615 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2616 GET_MODE_NAME (m),
2617 (RS6000_RECIP_AUTO_RE_P (m)
2618 ? "auto"
2619 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2620 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2621 ? "auto"
2622 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2625 fputs ("\n", stderr);
2628 if (rs6000_cpu_index >= 0)
2630 const char *name = processor_target_table[rs6000_cpu_index].name;
2631 HOST_WIDE_INT flags
2632 = processor_target_table[rs6000_cpu_index].target_enable;
2634 sprintf (flags_buffer, "-mcpu=%s flags", name);
2635 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2637 else
2638 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2640 if (rs6000_tune_index >= 0)
2642 const char *name = processor_target_table[rs6000_tune_index].name;
2643 HOST_WIDE_INT flags
2644 = processor_target_table[rs6000_tune_index].target_enable;
2646 sprintf (flags_buffer, "-mtune=%s flags", name);
2647 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2649 else
2650 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2652 cl_target_option_save (&cl_opts, &global_options);
2653 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2654 rs6000_isa_flags);
2656 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2657 rs6000_isa_flags_explicit);
2659 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2660 rs6000_builtin_mask);
2662 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2664 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2665 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2667 switch (rs6000_sched_costly_dep)
2669 case max_dep_latency:
2670 costly_str = "max_dep_latency";
2671 break;
2673 case no_dep_costly:
2674 costly_str = "no_dep_costly";
2675 break;
2677 case all_deps_costly:
2678 costly_str = "all_deps_costly";
2679 break;
2681 case true_store_to_load_dep_costly:
2682 costly_str = "true_store_to_load_dep_costly";
2683 break;
2685 case store_to_load_dep_costly:
2686 costly_str = "store_to_load_dep_costly";
2687 break;
2689 default:
2690 costly_str = costly_num;
2691 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2692 break;
2695 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2697 switch (rs6000_sched_insert_nops)
2699 case sched_finish_regroup_exact:
2700 nop_str = "sched_finish_regroup_exact";
2701 break;
2703 case sched_finish_pad_groups:
2704 nop_str = "sched_finish_pad_groups";
2705 break;
2707 case sched_finish_none:
2708 nop_str = "sched_finish_none";
2709 break;
2711 default:
2712 nop_str = nop_num;
2713 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2714 break;
2717 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2719 switch (rs6000_sdata)
2721 default:
2722 case SDATA_NONE:
2723 break;
2725 case SDATA_DATA:
2726 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2727 break;
2729 case SDATA_SYSV:
2730 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2731 break;
2733 case SDATA_EABI:
2734 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2735 break;
2739 switch (rs6000_traceback)
2741 case traceback_default: trace_str = "default"; break;
2742 case traceback_none: trace_str = "none"; break;
2743 case traceback_part: trace_str = "part"; break;
2744 case traceback_full: trace_str = "full"; break;
2745 default: trace_str = "unknown"; break;
2748 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2750 switch (rs6000_current_cmodel)
2752 case CMODEL_SMALL: cmodel_str = "small"; break;
2753 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2754 case CMODEL_LARGE: cmodel_str = "large"; break;
2755 default: cmodel_str = "unknown"; break;
2758 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2760 switch (rs6000_current_abi)
2762 case ABI_NONE: abi_str = "none"; break;
2763 case ABI_AIX: abi_str = "aix"; break;
2764 case ABI_ELFv2: abi_str = "ELFv2"; break;
2765 case ABI_V4: abi_str = "V4"; break;
2766 case ABI_DARWIN: abi_str = "darwin"; break;
2767 default: abi_str = "unknown"; break;
2770 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2772 if (rs6000_altivec_abi)
2773 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2775 if (rs6000_darwin64_abi)
2776 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2778 fprintf (stderr, DEBUG_FMT_S, "single_float",
2779 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2781 fprintf (stderr, DEBUG_FMT_S, "double_float",
2782 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2784 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2785 (TARGET_SOFT_FLOAT ? "true" : "false"));
2787 if (TARGET_LINK_STACK)
2788 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2790 if (TARGET_P8_FUSION)
2792 char options[80];
2794 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2795 if (TARGET_TOC_FUSION)
2796 strcat (options, ", toc");
2798 if (TARGET_P8_FUSION_SIGN)
2799 strcat (options, ", sign");
2801 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2804 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2805 TARGET_SECURE_PLT ? "secure" : "bss");
2806 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2807 aix_struct_return ? "aix" : "sysv");
2808 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2809 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2810 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2811 tf[!!rs6000_align_branch_targets]);
2812 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2813 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2814 rs6000_long_double_type_size);
2815 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2816 (int)rs6000_sched_restricted_insns_priority);
2817 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2818 (int)END_BUILTINS);
2819 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2820 (int)RS6000_BUILTIN_COUNT);
2822 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2823 (int)TARGET_FLOAT128_ENABLE_TYPE);
2825 if (TARGET_VSX)
2826 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2827 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2829 if (TARGET_DIRECT_MOVE_128)
2830 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2831 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2835 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2836 legitimate address support to figure out the appropriate addressing to
2837 use. */
2839 static void
2840 rs6000_setup_reg_addr_masks (void)
2842 ssize_t rc, reg, m, nregs;
2843 addr_mask_type any_addr_mask, addr_mask;
2845 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2847 machine_mode m2 = (machine_mode) m;
2848 bool complex_p = false;
2849 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2850 size_t msize;
2852 if (COMPLEX_MODE_P (m2))
2854 complex_p = true;
2855 m2 = GET_MODE_INNER (m2);
2858 msize = GET_MODE_SIZE (m2);
2860 /* SDmode is special in that we want to access it only via REG+REG
2861 addressing on power7 and above, since we want to use the LFIWZX and
2862 STFIWZX instructions to load it. */
2863 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2865 any_addr_mask = 0;
2866 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2868 addr_mask = 0;
2869 reg = reload_reg_map[rc].reg;
2871 /* Can mode values go in the GPR/FPR/Altivec registers? */
2872 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2874 bool small_int_vsx_p = (small_int_p
2875 && (rc == RELOAD_REG_FPR
2876 || rc == RELOAD_REG_VMX));
2878 nregs = rs6000_hard_regno_nregs[m][reg];
2879 addr_mask |= RELOAD_REG_VALID;
2881 /* Indicate if the mode takes more than 1 physical register. If
2882 it takes a single register, indicate it can do REG+REG
2883 addressing. Small integers in VSX registers can only do
2884 REG+REG addressing. */
2885 if (small_int_vsx_p)
2886 addr_mask |= RELOAD_REG_INDEXED;
2887 else if (nregs > 1 || m == BLKmode || complex_p)
2888 addr_mask |= RELOAD_REG_MULTIPLE;
2889 else
2890 addr_mask |= RELOAD_REG_INDEXED;
2892 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2893 addressing. If we allow scalars into Altivec registers,
2894 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2896 if (TARGET_UPDATE
2897 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2898 && msize <= 8
2899 && !VECTOR_MODE_P (m2)
2900 && !FLOAT128_VECTOR_P (m2)
2901 && !complex_p
2902 && !small_int_vsx_p)
2904 addr_mask |= RELOAD_REG_PRE_INCDEC;
2906 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2907 we don't allow PRE_MODIFY for some multi-register
2908 operations. */
2909 switch (m)
2911 default:
2912 addr_mask |= RELOAD_REG_PRE_MODIFY;
2913 break;
2915 case DImode:
2916 if (TARGET_POWERPC64)
2917 addr_mask |= RELOAD_REG_PRE_MODIFY;
2918 break;
2920 case DFmode:
2921 case DDmode:
2922 if (TARGET_DF_INSN)
2923 addr_mask |= RELOAD_REG_PRE_MODIFY;
2924 break;
2929 /* GPR and FPR registers can do REG+OFFSET addressing, except
2930 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2931 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2932 if ((addr_mask != 0) && !indexed_only_p
2933 && msize <= 8
2934 && (rc == RELOAD_REG_GPR
2935 || ((msize == 8 || m2 == SFmode)
2936 && (rc == RELOAD_REG_FPR
2937 || (rc == RELOAD_REG_VMX
2938 && TARGET_P9_DFORM_SCALAR)))))
2939 addr_mask |= RELOAD_REG_OFFSET;
2941 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2942 instructions are enabled. The offset for 128-bit VSX registers is
2943 only 12-bits. While GPRs can handle the full offset range, VSX
2944 registers can only handle the restricted range. */
2945 else if ((addr_mask != 0) && !indexed_only_p
2946 && msize == 16 && TARGET_P9_DFORM_VECTOR
2947 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2948 || (m2 == TImode && TARGET_VSX_TIMODE)))
2950 addr_mask |= RELOAD_REG_OFFSET;
2951 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2952 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2955 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2956 addressing on 128-bit types. */
2957 if (rc == RELOAD_REG_VMX && msize == 16
2958 && (addr_mask & RELOAD_REG_VALID) != 0)
2959 addr_mask |= RELOAD_REG_AND_M16;
2961 reg_addr[m].addr_mask[rc] = addr_mask;
2962 any_addr_mask |= addr_mask;
2965 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2970 /* Initialize the various global tables that are based on register size. */
2971 static void
2972 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2974 ssize_t r, m, c;
2975 int align64;
2976 int align32;
2978 /* Precalculate REGNO_REG_CLASS. */
2979 rs6000_regno_regclass[0] = GENERAL_REGS;
2980 for (r = 1; r < 32; ++r)
2981 rs6000_regno_regclass[r] = BASE_REGS;
2983 for (r = 32; r < 64; ++r)
2984 rs6000_regno_regclass[r] = FLOAT_REGS;
2986 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2987 rs6000_regno_regclass[r] = NO_REGS;
2989 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2990 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2992 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2993 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2994 rs6000_regno_regclass[r] = CR_REGS;
2996 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2997 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2998 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2999 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3000 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3001 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3002 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3003 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3004 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3005 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3007 /* Precalculate register class to simpler reload register class. We don't
3008 need all of the register classes that are combinations of different
3009 classes, just the simple ones that have constraint letters. */
3010 for (c = 0; c < N_REG_CLASSES; c++)
3011 reg_class_to_reg_type[c] = NO_REG_TYPE;
3013 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3014 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3015 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3016 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3017 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3018 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3019 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3020 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3021 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3022 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3024 if (TARGET_VSX)
3026 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3027 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3029 else
3031 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3032 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3035 /* Precalculate the valid memory formats as well as the vector information,
3036 this must be set up before the rs6000_hard_regno_nregs_internal calls
3037 below. */
3038 gcc_assert ((int)VECTOR_NONE == 0);
3039 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3040 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3042 gcc_assert ((int)CODE_FOR_nothing == 0);
3043 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3045 gcc_assert ((int)NO_REGS == 0);
3046 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3048 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3049 believes it can use native alignment or still uses 128-bit alignment. */
3050 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3052 align64 = 64;
3053 align32 = 32;
3055 else
3057 align64 = 128;
3058 align32 = 128;
3061 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3062 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3063 if (TARGET_FLOAT128_TYPE)
3065 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3066 rs6000_vector_align[KFmode] = 128;
3068 if (FLOAT128_IEEE_P (TFmode))
3070 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3071 rs6000_vector_align[TFmode] = 128;
3075 /* V2DF mode, VSX only. */
3076 if (TARGET_VSX)
3078 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3079 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3080 rs6000_vector_align[V2DFmode] = align64;
3083 /* V4SF mode, either VSX or Altivec. */
3084 if (TARGET_VSX)
3086 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3087 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3088 rs6000_vector_align[V4SFmode] = align32;
3090 else if (TARGET_ALTIVEC)
3092 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3093 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3094 rs6000_vector_align[V4SFmode] = align32;
3097 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3098 and stores. */
3099 if (TARGET_ALTIVEC)
3101 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3102 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3103 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3104 rs6000_vector_align[V4SImode] = align32;
3105 rs6000_vector_align[V8HImode] = align32;
3106 rs6000_vector_align[V16QImode] = align32;
3108 if (TARGET_VSX)
3110 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3111 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3112 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3114 else
3116 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3117 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3118 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3122 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3123 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3124 if (TARGET_VSX)
3126 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3127 rs6000_vector_unit[V2DImode]
3128 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3129 rs6000_vector_align[V2DImode] = align64;
3131 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3132 rs6000_vector_unit[V1TImode]
3133 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3134 rs6000_vector_align[V1TImode] = 128;
3137 /* DFmode, see if we want to use the VSX unit. Memory is handled
3138 differently, so don't set rs6000_vector_mem. */
3139 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3141 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3142 rs6000_vector_align[DFmode] = 64;
3145 /* SFmode, see if we want to use the VSX unit. */
3146 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3148 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3149 rs6000_vector_align[SFmode] = 32;
3152 /* Allow TImode in VSX register and set the VSX memory macros. */
3153 if (TARGET_VSX && TARGET_VSX_TIMODE)
3155 rs6000_vector_mem[TImode] = VECTOR_VSX;
3156 rs6000_vector_align[TImode] = align64;
3159 /* TODO add paired floating point vector support. */
3161 /* Register class constraints for the constraints that depend on compile
3162 switches. When the VSX code was added, different constraints were added
3163 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3164 of the VSX registers are used. The register classes for scalar floating
3165 point types is set, based on whether we allow that type into the upper
3166 (Altivec) registers. GCC has register classes to target the Altivec
3167 registers for load/store operations, to select using a VSX memory
3168 operation instead of the traditional floating point operation. The
3169 constraints are:
3171 d - Register class to use with traditional DFmode instructions.
3172 f - Register class to use with traditional SFmode instructions.
3173 v - Altivec register.
3174 wa - Any VSX register.
3175 wc - Reserved to represent individual CR bits (used in LLVM).
3176 wd - Preferred register class for V2DFmode.
3177 wf - Preferred register class for V4SFmode.
3178 wg - Float register for power6x move insns.
3179 wh - FP register for direct move instructions.
3180 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3181 wj - FP or VSX register to hold 64-bit integers for direct moves.
3182 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3183 wl - Float register if we can do 32-bit signed int loads.
3184 wm - VSX register for ISA 2.07 direct move operations.
3185 wn - always NO_REGS.
3186 wr - GPR if 64-bit mode is permitted.
3187 ws - Register class to do ISA 2.06 DF operations.
3188 wt - VSX register for TImode in VSX registers.
3189 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3190 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3191 ww - Register class to do SF conversions in with VSX operations.
3192 wx - Float register if we can do 32-bit int stores.
3193 wy - Register class to do ISA 2.07 SF operations.
3194 wz - Float register if we can do 32-bit unsigned int loads.
3195 wH - Altivec register if SImode is allowed in VSX registers.
3196 wI - VSX register if SImode is allowed in VSX registers.
3197 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3198 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3200 if (TARGET_HARD_FLOAT)
3201 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3203 if (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
3204 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3206 if (TARGET_VSX)
3208 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3209 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3210 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3211 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3212 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3213 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3215 if (TARGET_VSX_TIMODE)
3216 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3219 /* Add conditional constraints based on various options, to allow us to
3220 collapse multiple insn patterns. */
3221 if (TARGET_ALTIVEC)
3222 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3224 if (TARGET_MFPGPR) /* DFmode */
3225 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3227 if (TARGET_LFIWAX)
3228 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3230 if (TARGET_DIRECT_MOVE)
3232 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3233 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3234 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3235 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3236 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3237 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3240 if (TARGET_POWERPC64)
3242 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3243 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3246 if (TARGET_P8_VECTOR) /* SFmode */
3248 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3249 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3250 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3252 else if (TARGET_P8_VECTOR)
3254 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3255 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3257 else if (TARGET_VSX)
3258 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3260 if (TARGET_STFIWX)
3261 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3263 if (TARGET_LFIWZX)
3264 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3266 if (TARGET_FLOAT128_TYPE)
3268 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3269 if (FLOAT128_IEEE_P (TFmode))
3270 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3273 /* Support for new D-form instructions. */
3274 if (TARGET_P9_DFORM_SCALAR)
3275 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3277 /* Support for ISA 3.0 (power9) vectors. */
3278 if (TARGET_P9_VECTOR)
3279 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3281 /* Support for new direct moves (ISA 3.0 + 64bit). */
3282 if (TARGET_DIRECT_MOVE_128)
3283 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3285 /* Support small integers in VSX registers. */
3286 if (TARGET_P8_VECTOR)
3288 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3289 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3290 if (TARGET_P9_VECTOR)
3292 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3293 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3297 /* Set up the reload helper and direct move functions. */
3298 if (TARGET_VSX || TARGET_ALTIVEC)
3300 if (TARGET_64BIT)
3302 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3303 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3304 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3305 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3306 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3307 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3308 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3309 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3310 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3311 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3312 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3313 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3314 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3315 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3316 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3317 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3318 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3319 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3320 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3321 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3323 if (FLOAT128_VECTOR_P (KFmode))
3325 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3326 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3329 if (FLOAT128_VECTOR_P (TFmode))
3331 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3332 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3335 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3336 available. */
3337 if (TARGET_NO_SDMODE_STACK)
3339 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3340 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3343 if (TARGET_VSX_TIMODE)
3345 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3346 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3349 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3351 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3352 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3353 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3354 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3355 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3356 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3357 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3358 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3359 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3361 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3362 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3363 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3364 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3365 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3366 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3367 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3368 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3369 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3371 if (FLOAT128_VECTOR_P (KFmode))
3373 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3374 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3377 if (FLOAT128_VECTOR_P (TFmode))
3379 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3380 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3384 else
3386 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3387 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3388 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3389 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3390 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3391 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3392 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3393 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3394 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3395 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3396 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3397 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3398 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3399 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3400 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3401 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3402 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3403 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3404 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3405 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3407 if (FLOAT128_VECTOR_P (KFmode))
3409 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3410 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3413 if (FLOAT128_IEEE_P (TFmode))
3415 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3416 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3419 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3420 available. */
3421 if (TARGET_NO_SDMODE_STACK)
3423 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3424 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3427 if (TARGET_VSX_TIMODE)
3429 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3430 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3433 if (TARGET_DIRECT_MOVE)
3435 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3436 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3437 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3441 reg_addr[DFmode].scalar_in_vmx_p = true;
3442 reg_addr[DImode].scalar_in_vmx_p = true;
3444 if (TARGET_P8_VECTOR)
3446 reg_addr[SFmode].scalar_in_vmx_p = true;
3447 reg_addr[SImode].scalar_in_vmx_p = true;
3449 if (TARGET_P9_VECTOR)
3451 reg_addr[HImode].scalar_in_vmx_p = true;
3452 reg_addr[QImode].scalar_in_vmx_p = true;
3457 /* Setup the fusion operations. */
3458 if (TARGET_P8_FUSION)
3460 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3461 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3462 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3463 if (TARGET_64BIT)
3464 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3467 if (TARGET_P9_FUSION)
3469 struct fuse_insns {
3470 enum machine_mode mode; /* mode of the fused type. */
3471 enum machine_mode pmode; /* pointer mode. */
3472 enum rs6000_reload_reg_type rtype; /* register type. */
3473 enum insn_code load; /* load insn. */
3474 enum insn_code store; /* store insn. */
3477 static const struct fuse_insns addis_insns[] = {
3478 { SFmode, DImode, RELOAD_REG_FPR,
3479 CODE_FOR_fusion_vsx_di_sf_load,
3480 CODE_FOR_fusion_vsx_di_sf_store },
3482 { SFmode, SImode, RELOAD_REG_FPR,
3483 CODE_FOR_fusion_vsx_si_sf_load,
3484 CODE_FOR_fusion_vsx_si_sf_store },
3486 { DFmode, DImode, RELOAD_REG_FPR,
3487 CODE_FOR_fusion_vsx_di_df_load,
3488 CODE_FOR_fusion_vsx_di_df_store },
3490 { DFmode, SImode, RELOAD_REG_FPR,
3491 CODE_FOR_fusion_vsx_si_df_load,
3492 CODE_FOR_fusion_vsx_si_df_store },
3494 { DImode, DImode, RELOAD_REG_FPR,
3495 CODE_FOR_fusion_vsx_di_di_load,
3496 CODE_FOR_fusion_vsx_di_di_store },
3498 { DImode, SImode, RELOAD_REG_FPR,
3499 CODE_FOR_fusion_vsx_si_di_load,
3500 CODE_FOR_fusion_vsx_si_di_store },
3502 { QImode, DImode, RELOAD_REG_GPR,
3503 CODE_FOR_fusion_gpr_di_qi_load,
3504 CODE_FOR_fusion_gpr_di_qi_store },
3506 { QImode, SImode, RELOAD_REG_GPR,
3507 CODE_FOR_fusion_gpr_si_qi_load,
3508 CODE_FOR_fusion_gpr_si_qi_store },
3510 { HImode, DImode, RELOAD_REG_GPR,
3511 CODE_FOR_fusion_gpr_di_hi_load,
3512 CODE_FOR_fusion_gpr_di_hi_store },
3514 { HImode, SImode, RELOAD_REG_GPR,
3515 CODE_FOR_fusion_gpr_si_hi_load,
3516 CODE_FOR_fusion_gpr_si_hi_store },
3518 { SImode, DImode, RELOAD_REG_GPR,
3519 CODE_FOR_fusion_gpr_di_si_load,
3520 CODE_FOR_fusion_gpr_di_si_store },
3522 { SImode, SImode, RELOAD_REG_GPR,
3523 CODE_FOR_fusion_gpr_si_si_load,
3524 CODE_FOR_fusion_gpr_si_si_store },
3526 { SFmode, DImode, RELOAD_REG_GPR,
3527 CODE_FOR_fusion_gpr_di_sf_load,
3528 CODE_FOR_fusion_gpr_di_sf_store },
3530 { SFmode, SImode, RELOAD_REG_GPR,
3531 CODE_FOR_fusion_gpr_si_sf_load,
3532 CODE_FOR_fusion_gpr_si_sf_store },
3534 { DImode, DImode, RELOAD_REG_GPR,
3535 CODE_FOR_fusion_gpr_di_di_load,
3536 CODE_FOR_fusion_gpr_di_di_store },
3538 { DFmode, DImode, RELOAD_REG_GPR,
3539 CODE_FOR_fusion_gpr_di_df_load,
3540 CODE_FOR_fusion_gpr_di_df_store },
3543 machine_mode cur_pmode = Pmode;
3544 size_t i;
3546 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3548 machine_mode xmode = addis_insns[i].mode;
3549 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3551 if (addis_insns[i].pmode != cur_pmode)
3552 continue;
3554 if (rtype == RELOAD_REG_FPR && !TARGET_HARD_FLOAT)
3555 continue;
3557 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3558 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3560 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3562 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3563 = addis_insns[i].load;
3564 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3565 = addis_insns[i].store;
3570 /* Note which types we support fusing TOC setup plus memory insn. We only do
3571 fused TOCs for medium/large code models. */
3572 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3573 && (TARGET_CMODEL != CMODEL_SMALL))
3575 reg_addr[QImode].fused_toc = true;
3576 reg_addr[HImode].fused_toc = true;
3577 reg_addr[SImode].fused_toc = true;
3578 reg_addr[DImode].fused_toc = true;
3579 if (TARGET_HARD_FLOAT)
3581 if (TARGET_SINGLE_FLOAT)
3582 reg_addr[SFmode].fused_toc = true;
3583 if (TARGET_DOUBLE_FLOAT)
3584 reg_addr[DFmode].fused_toc = true;
3588 /* Precalculate HARD_REGNO_NREGS. */
3589 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3590 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3591 rs6000_hard_regno_nregs[m][r]
3592 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3594 /* Precalculate HARD_REGNO_MODE_OK. */
3595 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3596 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3597 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3598 rs6000_hard_regno_mode_ok_p[m][r] = true;
3600 /* Precalculate CLASS_MAX_NREGS sizes. */
3601 for (c = 0; c < LIM_REG_CLASSES; ++c)
3603 int reg_size;
3605 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3606 reg_size = UNITS_PER_VSX_WORD;
3608 else if (c == ALTIVEC_REGS)
3609 reg_size = UNITS_PER_ALTIVEC_WORD;
3611 else if (c == FLOAT_REGS)
3612 reg_size = UNITS_PER_FP_WORD;
3614 else
3615 reg_size = UNITS_PER_WORD;
3617 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3619 machine_mode m2 = (machine_mode)m;
3620 int reg_size2 = reg_size;
3622 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3623 in VSX. */
3624 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3625 reg_size2 = UNITS_PER_FP_WORD;
3627 rs6000_class_max_nregs[m][c]
3628 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3632 /* Calculate which modes to automatically generate code to use a the
3633 reciprocal divide and square root instructions. In the future, possibly
3634 automatically generate the instructions even if the user did not specify
3635 -mrecip. The older machines double precision reciprocal sqrt estimate is
3636 not accurate enough. */
3637 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3638 if (TARGET_FRES)
3639 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3640 if (TARGET_FRE)
3641 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3642 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3643 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3644 if (VECTOR_UNIT_VSX_P (V2DFmode))
3645 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3647 if (TARGET_FRSQRTES)
3648 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3649 if (TARGET_FRSQRTE)
3650 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3651 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3652 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3653 if (VECTOR_UNIT_VSX_P (V2DFmode))
3654 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3656 if (rs6000_recip_control)
3658 if (!flag_finite_math_only)
3659 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3660 if (flag_trapping_math)
3661 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3662 if (!flag_reciprocal_math)
3663 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3664 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3666 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3667 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3668 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3670 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3671 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3672 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3674 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3675 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3676 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3678 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3679 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3680 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3682 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3683 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3684 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3686 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3687 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3688 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3690 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3691 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3692 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3694 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3695 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3696 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3700 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3701 legitimate address support to figure out the appropriate addressing to
3702 use. */
3703 rs6000_setup_reg_addr_masks ();
3705 if (global_init_p || TARGET_DEBUG_TARGET)
3707 if (TARGET_DEBUG_REG)
3708 rs6000_debug_reg_global ();
3710 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3711 fprintf (stderr,
3712 "SImode variable mult cost = %d\n"
3713 "SImode constant mult cost = %d\n"
3714 "SImode short constant mult cost = %d\n"
3715 "DImode multipliciation cost = %d\n"
3716 "SImode division cost = %d\n"
3717 "DImode division cost = %d\n"
3718 "Simple fp operation cost = %d\n"
3719 "DFmode multiplication cost = %d\n"
3720 "SFmode division cost = %d\n"
3721 "DFmode division cost = %d\n"
3722 "cache line size = %d\n"
3723 "l1 cache size = %d\n"
3724 "l2 cache size = %d\n"
3725 "simultaneous prefetches = %d\n"
3726 "\n",
3727 rs6000_cost->mulsi,
3728 rs6000_cost->mulsi_const,
3729 rs6000_cost->mulsi_const9,
3730 rs6000_cost->muldi,
3731 rs6000_cost->divsi,
3732 rs6000_cost->divdi,
3733 rs6000_cost->fp,
3734 rs6000_cost->dmul,
3735 rs6000_cost->sdiv,
3736 rs6000_cost->ddiv,
3737 rs6000_cost->cache_line_size,
3738 rs6000_cost->l1_cache_size,
3739 rs6000_cost->l2_cache_size,
3740 rs6000_cost->simultaneous_prefetches);
3744 #if TARGET_MACHO
3745 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3747 static void
3748 darwin_rs6000_override_options (void)
3750 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3751 off. */
3752 rs6000_altivec_abi = 1;
3753 TARGET_ALTIVEC_VRSAVE = 1;
3754 rs6000_current_abi = ABI_DARWIN;
3756 if (DEFAULT_ABI == ABI_DARWIN
3757 && TARGET_64BIT)
3758 darwin_one_byte_bool = 1;
3760 if (TARGET_64BIT && ! TARGET_POWERPC64)
3762 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3763 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3765 if (flag_mkernel)
3767 rs6000_default_long_calls = 1;
3768 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3771 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3772 Altivec. */
3773 if (!flag_mkernel && !flag_apple_kext
3774 && TARGET_64BIT
3775 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3776 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3778 /* Unless the user (not the configurer) has explicitly overridden
3779 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3780 G4 unless targeting the kernel. */
3781 if (!flag_mkernel
3782 && !flag_apple_kext
3783 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3784 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3785 && ! global_options_set.x_rs6000_cpu_index)
3787 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3790 #endif
3792 /* If not otherwise specified by a target, make 'long double' equivalent to
3793 'double'. */
3795 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3796 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3797 #endif
3799 /* Return the builtin mask of the various options used that could affect which
3800 builtins were used. In the past we used target_flags, but we've run out of
3801 bits, and some options like PAIRED are no longer in target_flags. */
3803 HOST_WIDE_INT
3804 rs6000_builtin_mask_calculate (void)
3806 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3807 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3808 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3809 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3810 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3811 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3812 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3813 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3814 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3815 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3816 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3817 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3818 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3819 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3820 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3821 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3822 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3823 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3824 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3825 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3826 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3829 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3830 to clobber the XER[CA] bit because clobbering that bit without telling
3831 the compiler worked just fine with versions of GCC before GCC 5, and
3832 breaking a lot of older code in ways that are hard to track down is
3833 not such a great idea. */
3835 static rtx_insn *
3836 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3837 vec<const char *> &/*constraints*/,
3838 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3840 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3841 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3842 return NULL;
3845 /* Override command line options.
3847 Combine build-specific configuration information with options
3848 specified on the command line to set various state variables which
3849 influence code generation, optimization, and expansion of built-in
3850 functions. Assure that command-line configuration preferences are
3851 compatible with each other and with the build configuration; issue
3852 warnings while adjusting configuration or error messages while
3853 rejecting configuration.
3855 Upon entry to this function:
3857 This function is called once at the beginning of
3858 compilation, and then again at the start and end of compiling
3859 each section of code that has a different configuration, as
3860 indicated, for example, by adding the
3862 __attribute__((__target__("cpu=power9")))
3864 qualifier to a function definition or, for example, by bracketing
3865 code between
3867 #pragma GCC target("altivec")
3871 #pragma GCC reset_options
3873 directives. Parameter global_init_p is true for the initial
3874 invocation, which initializes global variables, and false for all
3875 subsequent invocations.
3878 Various global state information is assumed to be valid. This
3879 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3880 default CPU specified at build configure time, TARGET_DEFAULT,
3881 representing the default set of option flags for the default
3882 target, and global_options_set.x_rs6000_isa_flags, representing
3883 which options were requested on the command line.
3885 Upon return from this function:
3887 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3888 was set by name on the command line. Additionally, if certain
3889 attributes are automatically enabled or disabled by this function
3890 in order to assure compatibility between options and
3891 configuration, the flags associated with those attributes are
3892 also set. By setting these "explicit bits", we avoid the risk
3893 that other code might accidentally overwrite these particular
3894 attributes with "default values".
3896 The various bits of rs6000_isa_flags are set to indicate the
3897 target options that have been selected for the most current
3898 compilation efforts. This has the effect of also turning on the
3899 associated TARGET_XXX values since these are macros which are
3900 generally defined to test the corresponding bit of the
3901 rs6000_isa_flags variable.
3903 The variable rs6000_builtin_mask is set to represent the target
3904 options for the most current compilation efforts, consistent with
3905 the current contents of rs6000_isa_flags. This variable controls
3906 expansion of built-in functions.
3908 Various other global variables and fields of global structures
3909 (over 50 in all) are initialized to reflect the desired options
3910 for the most current compilation efforts. */
3912 static bool
3913 rs6000_option_override_internal (bool global_init_p)
3915 bool ret = true;
3916 bool have_cpu = false;
3918 /* The default cpu requested at configure time, if any. */
3919 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3921 HOST_WIDE_INT set_masks;
3922 HOST_WIDE_INT ignore_masks;
3923 int cpu_index;
3924 int tune_index;
3925 struct cl_target_option *main_target_opt
3926 = ((global_init_p || target_option_default_node == NULL)
3927 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3929 /* Print defaults. */
3930 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3931 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3933 /* Remember the explicit arguments. */
3934 if (global_init_p)
3935 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3937 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3938 library functions, so warn about it. The flag may be useful for
3939 performance studies from time to time though, so don't disable it
3940 entirely. */
3941 if (global_options_set.x_rs6000_alignment_flags
3942 && rs6000_alignment_flags == MASK_ALIGN_POWER
3943 && DEFAULT_ABI == ABI_DARWIN
3944 && TARGET_64BIT)
3945 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3946 " it is incompatible with the installed C and C++ libraries");
3948 /* Numerous experiment shows that IRA based loop pressure
3949 calculation works better for RTL loop invariant motion on targets
3950 with enough (>= 32) registers. It is an expensive optimization.
3951 So it is on only for peak performance. */
3952 if (optimize >= 3 && global_init_p
3953 && !global_options_set.x_flag_ira_loop_pressure)
3954 flag_ira_loop_pressure = 1;
3956 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3957 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3958 options were already specified. */
3959 if (flag_sanitize & SANITIZE_USER_ADDRESS
3960 && !global_options_set.x_flag_asynchronous_unwind_tables)
3961 flag_asynchronous_unwind_tables = 1;
3963 /* Set the pointer size. */
3964 if (TARGET_64BIT)
3966 rs6000_pmode = (int)DImode;
3967 rs6000_pointer_size = 64;
3969 else
3971 rs6000_pmode = (int)SImode;
3972 rs6000_pointer_size = 32;
3975 /* Some OSs don't support saving the high part of 64-bit registers on context
3976 switch. Other OSs don't support saving Altivec registers. On those OSs,
3977 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3978 if the user wants either, the user must explicitly specify them and we
3979 won't interfere with the user's specification. */
3981 set_masks = POWERPC_MASKS;
3982 #ifdef OS_MISSING_POWERPC64
3983 if (OS_MISSING_POWERPC64)
3984 set_masks &= ~OPTION_MASK_POWERPC64;
3985 #endif
3986 #ifdef OS_MISSING_ALTIVEC
3987 if (OS_MISSING_ALTIVEC)
3988 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3989 | OTHER_VSX_VECTOR_MASKS);
3990 #endif
3992 /* Don't override by the processor default if given explicitly. */
3993 set_masks &= ~rs6000_isa_flags_explicit;
3995 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3996 the cpu in a target attribute or pragma, but did not specify a tuning
3997 option, use the cpu for the tuning option rather than the option specified
3998 with -mtune on the command line. Process a '--with-cpu' configuration
3999 request as an implicit --cpu. */
4000 if (rs6000_cpu_index >= 0)
4002 cpu_index = rs6000_cpu_index;
4003 have_cpu = true;
4005 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4007 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
4008 have_cpu = true;
4010 else if (implicit_cpu)
4012 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
4013 have_cpu = true;
4015 else
4017 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4018 const char *default_cpu = ((!TARGET_POWERPC64)
4019 ? "powerpc"
4020 : ((BYTES_BIG_ENDIAN)
4021 ? "powerpc64"
4022 : "powerpc64le"));
4024 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4025 have_cpu = false;
4028 gcc_assert (cpu_index >= 0);
4030 if (have_cpu)
4032 #ifndef HAVE_AS_POWER9
4033 if (processor_target_table[rs6000_cpu_index].processor
4034 == PROCESSOR_POWER9)
4036 have_cpu = false;
4037 warning (0, "will not generate power9 instructions because "
4038 "assembler lacks power9 support");
4040 #endif
4041 #ifndef HAVE_AS_POWER8
4042 if (processor_target_table[rs6000_cpu_index].processor
4043 == PROCESSOR_POWER8)
4045 have_cpu = false;
4046 warning (0, "will not generate power8 instructions because "
4047 "assembler lacks power8 support");
4049 #endif
4050 #ifndef HAVE_AS_POPCNTD
4051 if (processor_target_table[rs6000_cpu_index].processor
4052 == PROCESSOR_POWER7)
4054 have_cpu = false;
4055 warning (0, "will not generate power7 instructions because "
4056 "assembler lacks power7 support");
4058 #endif
4059 #ifndef HAVE_AS_DFP
4060 if (processor_target_table[rs6000_cpu_index].processor
4061 == PROCESSOR_POWER6)
4063 have_cpu = false;
4064 warning (0, "will not generate power6 instructions because "
4065 "assembler lacks power6 support");
4067 #endif
4068 #ifndef HAVE_AS_POPCNTB
4069 if (processor_target_table[rs6000_cpu_index].processor
4070 == PROCESSOR_POWER5)
4072 have_cpu = false;
4073 warning (0, "will not generate power5 instructions because "
4074 "assembler lacks power5 support");
4076 #endif
4078 if (!have_cpu)
4080 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4081 const char *default_cpu = (!TARGET_POWERPC64
4082 ? "powerpc"
4083 : (BYTES_BIG_ENDIAN
4084 ? "powerpc64"
4085 : "powerpc64le"));
4087 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4091 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4092 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4093 with those from the cpu, except for options that were explicitly set. If
4094 we don't have a cpu, do not override the target bits set in
4095 TARGET_DEFAULT. */
4096 if (have_cpu)
4098 rs6000_isa_flags &= ~set_masks;
4099 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4100 & set_masks);
4102 else
4104 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4105 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4106 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4107 to using rs6000_isa_flags, we need to do the initialization here.
4109 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4110 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4111 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4112 : processor_target_table[cpu_index].target_enable);
4113 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4116 if (rs6000_tune_index >= 0)
4117 tune_index = rs6000_tune_index;
4118 else if (have_cpu)
4119 rs6000_tune_index = tune_index = cpu_index;
4120 else
4122 size_t i;
4123 enum processor_type tune_proc
4124 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4126 tune_index = -1;
4127 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4128 if (processor_target_table[i].processor == tune_proc)
4130 rs6000_tune_index = tune_index = i;
4131 break;
4135 gcc_assert (tune_index >= 0);
4136 rs6000_cpu = processor_target_table[tune_index].processor;
4138 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4139 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4140 || rs6000_cpu == PROCESSOR_PPCE5500)
4142 if (TARGET_ALTIVEC)
4143 error ("AltiVec not supported in this target");
4146 /* If we are optimizing big endian systems for space, use the load/store
4147 multiple and string instructions. */
4148 if (BYTES_BIG_ENDIAN && optimize_size)
4149 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4150 | OPTION_MASK_STRING);
4152 /* Don't allow -mmultiple or -mstring on little endian systems
4153 unless the cpu is a 750, because the hardware doesn't support the
4154 instructions used in little endian mode, and causes an alignment
4155 trap. The 750 does not cause an alignment trap (except when the
4156 target is unaligned). */
4158 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4160 if (TARGET_MULTIPLE)
4162 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4163 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4164 warning (0, "-mmultiple is not supported on little endian systems");
4167 if (TARGET_STRING)
4169 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4170 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4171 warning (0, "-mstring is not supported on little endian systems");
4175 /* If little-endian, default to -mstrict-align on older processors.
4176 Testing for htm matches power8 and later. */
4177 if (!BYTES_BIG_ENDIAN
4178 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4179 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4181 /* -maltivec={le,be} implies -maltivec. */
4182 if (rs6000_altivec_element_order != 0)
4183 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4185 /* Disallow -maltivec=le in big endian mode for now. This is not
4186 known to be useful for anyone. */
4187 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4189 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4190 rs6000_altivec_element_order = 0;
4193 /* Add some warnings for VSX. */
4194 if (TARGET_VSX)
4196 const char *msg = NULL;
4197 if (!TARGET_HARD_FLOAT || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4199 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4200 msg = N_("-mvsx requires hardware floating point");
4201 else
4203 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4204 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4207 else if (TARGET_PAIRED_FLOAT)
4208 msg = N_("-mvsx and -mpaired are incompatible");
4209 else if (TARGET_AVOID_XFORM > 0)
4210 msg = N_("-mvsx needs indexed addressing");
4211 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4212 & OPTION_MASK_ALTIVEC))
4214 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4215 msg = N_("-mvsx and -mno-altivec are incompatible");
4216 else
4217 msg = N_("-mno-altivec disables vsx");
4220 if (msg)
4222 warning (0, msg);
4223 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4224 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4228 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4229 the -mcpu setting to enable options that conflict. */
4230 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4231 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4232 | OPTION_MASK_ALTIVEC
4233 | OPTION_MASK_VSX)) != 0)
4234 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4235 | OPTION_MASK_DIRECT_MOVE)
4236 & ~rs6000_isa_flags_explicit);
4238 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4239 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4241 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4242 off all of the options that depend on those flags. */
4243 ignore_masks = rs6000_disable_incompatible_switches ();
4245 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4246 unless the user explicitly used the -mno-<option> to disable the code. */
4247 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4248 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4249 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4250 else if (TARGET_P9_MINMAX)
4252 if (have_cpu)
4254 if (cpu_index == PROCESSOR_POWER9)
4256 /* legacy behavior: allow -mcpu=power9 with certain
4257 capabilities explicitly disabled. */
4258 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4260 else
4261 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4262 "<xxx> less than power9");
4264 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4265 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4266 & rs6000_isa_flags_explicit))
4267 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4268 were explicitly cleared. */
4269 error ("-mpower9-minmax incompatible with explicitly disabled options");
4270 else
4271 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4273 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4274 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4275 else if (TARGET_VSX)
4276 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4277 else if (TARGET_POPCNTD)
4278 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4279 else if (TARGET_DFP)
4280 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4281 else if (TARGET_CMPB)
4282 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4283 else if (TARGET_FPRND)
4284 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4285 else if (TARGET_POPCNTB)
4286 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4287 else if (TARGET_ALTIVEC)
4288 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4290 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4292 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4293 error ("-mcrypto requires -maltivec");
4294 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4297 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4299 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4300 error ("-mdirect-move requires -mvsx");
4301 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4304 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4306 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4307 error ("-mpower8-vector requires -maltivec");
4308 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4311 if (TARGET_P8_VECTOR && !TARGET_VSX)
4313 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4314 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4315 error ("-mpower8-vector requires -mvsx");
4316 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4318 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4319 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4320 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4322 else
4324 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4325 not explicit. */
4326 rs6000_isa_flags |= OPTION_MASK_VSX;
4327 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4331 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4333 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4334 error ("-mvsx-timode requires -mvsx");
4335 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4338 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4340 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4341 error ("-mhard-dfp requires -mhard-float");
4342 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4345 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4346 silently turn off quad memory mode. */
4347 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4349 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4350 warning (0, N_("-mquad-memory requires 64-bit mode"));
4352 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4353 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4355 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4356 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4359 /* Non-atomic quad memory load/store are disabled for little endian, since
4360 the words are reversed, but atomic operations can still be done by
4361 swapping the words. */
4362 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4364 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4365 warning (0, N_("-mquad-memory is not available in little endian mode"));
4367 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4370 /* Assume if the user asked for normal quad memory instructions, they want
4371 the atomic versions as well, unless they explicity told us not to use quad
4372 word atomic instructions. */
4373 if (TARGET_QUAD_MEMORY
4374 && !TARGET_QUAD_MEMORY_ATOMIC
4375 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4376 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4378 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4379 generating power8 instructions. */
4380 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4381 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4382 & OPTION_MASK_P8_FUSION);
4384 /* Setting additional fusion flags turns on base fusion. */
4385 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4387 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4389 if (TARGET_P8_FUSION_SIGN)
4390 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4392 if (TARGET_TOC_FUSION)
4393 error ("-mtoc-fusion requires -mpower8-fusion");
4395 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4397 else
4398 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4401 /* Power9 fusion is a superset over power8 fusion. */
4402 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4404 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4406 /* We prefer to not mention undocumented options in
4407 error messages. However, if users have managed to select
4408 power9-fusion without selecting power8-fusion, they
4409 already know about undocumented flags. */
4410 error ("-mpower9-fusion requires -mpower8-fusion");
4411 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4413 else
4414 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4417 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4418 generating power9 instructions. */
4419 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4420 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4421 & OPTION_MASK_P9_FUSION);
4423 /* Power8 does not fuse sign extended loads with the addis. If we are
4424 optimizing at high levels for speed, convert a sign extended load into a
4425 zero extending load, and an explicit sign extension. */
4426 if (TARGET_P8_FUSION
4427 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4428 && optimize_function_for_speed_p (cfun)
4429 && optimize >= 3)
4430 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4432 /* TOC fusion requires 64-bit and medium/large code model. */
4433 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4435 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4436 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4437 warning (0, N_("-mtoc-fusion requires 64-bit"));
4440 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4442 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4443 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4444 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4447 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4448 model. */
4449 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4450 && (TARGET_CMODEL != CMODEL_SMALL)
4451 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4452 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4454 /* ISA 3.0 vector instructions include ISA 2.07. */
4455 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4457 /* We prefer to not mention undocumented options in
4458 error messages. However, if users have managed to select
4459 power9-vector without selecting power8-vector, they
4460 already know about undocumented flags. */
4461 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4462 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4463 error ("-mpower9-vector requires -mpower8-vector");
4464 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4466 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4467 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4468 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4470 else
4472 /* OPTION_MASK_P9_VECTOR is explicit and
4473 OPTION_MASK_P8_VECTOR is not explicit. */
4474 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4475 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4479 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4480 -mpower9-dform-vector. */
4481 if (TARGET_P9_DFORM_BOTH > 0)
4483 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4484 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4486 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4487 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4489 else if (TARGET_P9_DFORM_BOTH == 0)
4491 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4492 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4494 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4495 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4498 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4499 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4501 /* We prefer to not mention undocumented options in
4502 error messages. However, if users have managed to select
4503 power9-dform without selecting power9-vector, they
4504 already know about undocumented flags. */
4505 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4506 && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4507 | OPTION_MASK_P9_DFORM_VECTOR)))
4508 error ("-mpower9-dform requires -mpower9-vector");
4509 else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4511 rs6000_isa_flags &=
4512 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4513 rs6000_isa_flags_explicit |=
4514 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4516 else
4518 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4519 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4520 may be explicit. */
4521 rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4522 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4526 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4527 && !TARGET_DIRECT_MOVE)
4529 /* We prefer to not mention undocumented options in
4530 error messages. However, if users have managed to select
4531 power9-dform without selecting direct-move, they
4532 already know about undocumented flags. */
4533 if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4534 && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4535 (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4536 (TARGET_P9_DFORM_BOTH == 1)))
4537 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4538 " require -mdirect-move");
4539 else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4541 rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4542 rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4544 else
4546 rs6000_isa_flags &=
4547 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4548 rs6000_isa_flags_explicit |=
4549 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4553 /* Enable -mvsx-timode by default if VSX. */
4554 if (TARGET_VSX && !TARGET_VSX_TIMODE
4555 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4556 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4558 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4559 support. If we only have ISA 2.06 support, and the user did not specify
4560 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4561 but we don't enable the full vectorization support */
4562 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4563 TARGET_ALLOW_MOVMISALIGN = 1;
4565 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4567 if (TARGET_ALLOW_MOVMISALIGN > 0
4568 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4569 error ("-mallow-movmisalign requires -mvsx");
4571 TARGET_ALLOW_MOVMISALIGN = 0;
4574 /* Determine when unaligned vector accesses are permitted, and when
4575 they are preferred over masked Altivec loads. Note that if
4576 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4577 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4578 not true. */
4579 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4581 if (!TARGET_VSX)
4583 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4584 error ("-mefficient-unaligned-vsx requires -mvsx");
4586 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4589 else if (!TARGET_ALLOW_MOVMISALIGN)
4591 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4592 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4594 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4598 /* Set long double size before the IEEE 128-bit tests. */
4599 if (!global_options_set.x_rs6000_long_double_type_size)
4601 if (main_target_opt != NULL
4602 && (main_target_opt->x_rs6000_long_double_type_size
4603 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4604 error ("target attribute or pragma changes long double size");
4605 else
4606 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4609 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4610 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4611 pick up this default. */
4612 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4613 if (!global_options_set.x_rs6000_ieeequad)
4614 rs6000_ieeequad = 1;
4615 #endif
4617 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4618 sytems, but don't enable the __float128 keyword. */
4619 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4620 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4621 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4622 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4624 /* IEEE 128-bit floating point requires VSX support. */
4625 if (!TARGET_VSX)
4627 if (TARGET_FLOAT128_KEYWORD)
4629 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4630 error ("-mfloat128 requires VSX support");
4632 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4633 | OPTION_MASK_FLOAT128_KEYWORD
4634 | OPTION_MASK_FLOAT128_HW);
4637 else if (TARGET_FLOAT128_TYPE)
4639 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4640 error ("-mfloat128-type requires VSX support");
4642 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4643 | OPTION_MASK_FLOAT128_KEYWORD
4644 | OPTION_MASK_FLOAT128_HW);
4648 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4649 128-bit floating point support to be enabled. */
4650 if (!TARGET_FLOAT128_TYPE)
4652 if (TARGET_FLOAT128_KEYWORD)
4654 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4656 error ("-mfloat128 requires -mfloat128-type");
4657 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4658 | OPTION_MASK_FLOAT128_KEYWORD
4659 | OPTION_MASK_FLOAT128_HW);
4661 else
4662 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4665 if (TARGET_FLOAT128_HW)
4667 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4669 error ("-mfloat128-hardware requires -mfloat128-type");
4670 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4672 else
4673 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4674 | OPTION_MASK_FLOAT128_KEYWORD
4675 | OPTION_MASK_FLOAT128_HW);
4679 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4680 -mfloat128-hardware by default. However, don't enable the __float128
4681 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4682 -mfloat128 option as well if it was not already set. */
4683 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4684 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4685 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4686 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4688 if (TARGET_FLOAT128_HW
4689 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4691 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4692 error ("-mfloat128-hardware requires full ISA 3.0 support");
4694 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4697 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4699 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4700 error ("-mfloat128-hardware requires -m64");
4702 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4705 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4706 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4707 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4708 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4710 /* Print the options after updating the defaults. */
4711 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4712 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4714 /* E500mc does "better" if we inline more aggressively. Respect the
4715 user's opinion, though. */
4716 if (rs6000_block_move_inline_limit == 0
4717 && (rs6000_cpu == PROCESSOR_PPCE500MC
4718 || rs6000_cpu == PROCESSOR_PPCE500MC64
4719 || rs6000_cpu == PROCESSOR_PPCE5500
4720 || rs6000_cpu == PROCESSOR_PPCE6500))
4721 rs6000_block_move_inline_limit = 128;
4723 /* store_one_arg depends on expand_block_move to handle at least the
4724 size of reg_parm_stack_space. */
4725 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4726 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4728 if (global_init_p)
4730 /* If the appropriate debug option is enabled, replace the target hooks
4731 with debug versions that call the real version and then prints
4732 debugging information. */
4733 if (TARGET_DEBUG_COST)
4735 targetm.rtx_costs = rs6000_debug_rtx_costs;
4736 targetm.address_cost = rs6000_debug_address_cost;
4737 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4740 if (TARGET_DEBUG_ADDR)
4742 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4743 targetm.legitimize_address = rs6000_debug_legitimize_address;
4744 rs6000_secondary_reload_class_ptr
4745 = rs6000_debug_secondary_reload_class;
4746 rs6000_secondary_memory_needed_ptr
4747 = rs6000_debug_secondary_memory_needed;
4748 rs6000_cannot_change_mode_class_ptr
4749 = rs6000_debug_cannot_change_mode_class;
4750 rs6000_preferred_reload_class_ptr
4751 = rs6000_debug_preferred_reload_class;
4752 rs6000_legitimize_reload_address_ptr
4753 = rs6000_debug_legitimize_reload_address;
4754 rs6000_mode_dependent_address_ptr
4755 = rs6000_debug_mode_dependent_address;
4758 if (rs6000_veclibabi_name)
4760 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4761 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4762 else
4764 error ("unknown vectorization library ABI type (%s) for "
4765 "-mveclibabi= switch", rs6000_veclibabi_name);
4766 ret = false;
4771 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4772 target attribute or pragma which automatically enables both options,
4773 unless the altivec ABI was set. This is set by default for 64-bit, but
4774 not for 32-bit. */
4775 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4776 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4777 | OPTION_MASK_FLOAT128_TYPE
4778 | OPTION_MASK_FLOAT128_KEYWORD)
4779 & ~rs6000_isa_flags_explicit);
4781 /* Enable Altivec ABI for AIX -maltivec. */
4782 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4784 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4785 error ("target attribute or pragma changes AltiVec ABI");
4786 else
4787 rs6000_altivec_abi = 1;
4790 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4791 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4792 be explicitly overridden in either case. */
4793 if (TARGET_ELF)
4795 if (!global_options_set.x_rs6000_altivec_abi
4796 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4798 if (main_target_opt != NULL &&
4799 !main_target_opt->x_rs6000_altivec_abi)
4800 error ("target attribute or pragma changes AltiVec ABI");
4801 else
4802 rs6000_altivec_abi = 1;
4806 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4807 So far, the only darwin64 targets are also MACH-O. */
4808 if (TARGET_MACHO
4809 && DEFAULT_ABI == ABI_DARWIN
4810 && TARGET_64BIT)
4812 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4813 error ("target attribute or pragma changes darwin64 ABI");
4814 else
4816 rs6000_darwin64_abi = 1;
4817 /* Default to natural alignment, for better performance. */
4818 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4822 /* Place FP constants in the constant pool instead of TOC
4823 if section anchors enabled. */
4824 if (flag_section_anchors
4825 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4826 TARGET_NO_FP_IN_TOC = 1;
4828 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4829 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4831 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4832 SUBTARGET_OVERRIDE_OPTIONS;
4833 #endif
4834 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4835 SUBSUBTARGET_OVERRIDE_OPTIONS;
4836 #endif
4837 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4838 SUB3TARGET_OVERRIDE_OPTIONS;
4839 #endif
4841 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4842 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4844 /* For the E500 family of cores, reset the single/double FP flags to let us
4845 check that they remain constant across attributes or pragmas. Also,
4846 clear a possible request for string instructions, not supported and which
4847 we might have silently queried above for -Os.
4849 For other families, clear ISEL in case it was set implicitly.
4852 switch (rs6000_cpu)
4854 case PROCESSOR_PPC8540:
4855 case PROCESSOR_PPC8548:
4856 case PROCESSOR_PPCE500MC:
4857 case PROCESSOR_PPCE500MC64:
4858 case PROCESSOR_PPCE5500:
4859 case PROCESSOR_PPCE6500:
4861 rs6000_single_float = 0;
4862 rs6000_double_float = 0;
4864 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4866 break;
4868 default:
4870 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4871 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4873 break;
4876 if (main_target_opt)
4878 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4879 error ("target attribute or pragma changes single precision floating "
4880 "point");
4881 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4882 error ("target attribute or pragma changes double precision floating "
4883 "point");
4886 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4887 && rs6000_cpu != PROCESSOR_POWER5
4888 && rs6000_cpu != PROCESSOR_POWER6
4889 && rs6000_cpu != PROCESSOR_POWER7
4890 && rs6000_cpu != PROCESSOR_POWER8
4891 && rs6000_cpu != PROCESSOR_POWER9
4892 && rs6000_cpu != PROCESSOR_PPCA2
4893 && rs6000_cpu != PROCESSOR_CELL
4894 && rs6000_cpu != PROCESSOR_PPC476);
4895 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4896 || rs6000_cpu == PROCESSOR_POWER5
4897 || rs6000_cpu == PROCESSOR_POWER7
4898 || rs6000_cpu == PROCESSOR_POWER8);
4899 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4900 || rs6000_cpu == PROCESSOR_POWER5
4901 || rs6000_cpu == PROCESSOR_POWER6
4902 || rs6000_cpu == PROCESSOR_POWER7
4903 || rs6000_cpu == PROCESSOR_POWER8
4904 || rs6000_cpu == PROCESSOR_POWER9
4905 || rs6000_cpu == PROCESSOR_PPCE500MC
4906 || rs6000_cpu == PROCESSOR_PPCE500MC64
4907 || rs6000_cpu == PROCESSOR_PPCE5500
4908 || rs6000_cpu == PROCESSOR_PPCE6500);
4910 /* Allow debug switches to override the above settings. These are set to -1
4911 in rs6000.opt to indicate the user hasn't directly set the switch. */
4912 if (TARGET_ALWAYS_HINT >= 0)
4913 rs6000_always_hint = TARGET_ALWAYS_HINT;
4915 if (TARGET_SCHED_GROUPS >= 0)
4916 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4918 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4919 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4921 rs6000_sched_restricted_insns_priority
4922 = (rs6000_sched_groups ? 1 : 0);
4924 /* Handle -msched-costly-dep option. */
4925 rs6000_sched_costly_dep
4926 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4928 if (rs6000_sched_costly_dep_str)
4930 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4931 rs6000_sched_costly_dep = no_dep_costly;
4932 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4933 rs6000_sched_costly_dep = all_deps_costly;
4934 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4935 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4936 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4937 rs6000_sched_costly_dep = store_to_load_dep_costly;
4938 else
4939 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4940 atoi (rs6000_sched_costly_dep_str));
4943 /* Handle -minsert-sched-nops option. */
4944 rs6000_sched_insert_nops
4945 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4947 if (rs6000_sched_insert_nops_str)
4949 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4950 rs6000_sched_insert_nops = sched_finish_none;
4951 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4952 rs6000_sched_insert_nops = sched_finish_pad_groups;
4953 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4954 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4955 else
4956 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4957 atoi (rs6000_sched_insert_nops_str));
4960 /* Handle stack protector */
4961 if (!global_options_set.x_rs6000_stack_protector_guard)
4962 #ifdef TARGET_THREAD_SSP_OFFSET
4963 rs6000_stack_protector_guard = SSP_TLS;
4964 #else
4965 rs6000_stack_protector_guard = SSP_GLOBAL;
4966 #endif
4968 #ifdef TARGET_THREAD_SSP_OFFSET
4969 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4970 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4971 #endif
4973 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4975 char *endp;
4976 const char *str = rs6000_stack_protector_guard_offset_str;
4978 errno = 0;
4979 long offset = strtol (str, &endp, 0);
4980 if (!*str || *endp || errno)
4981 error ("%qs is not a valid number "
4982 "in -mstack-protector-guard-offset=", str);
4984 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4985 || (TARGET_64BIT && (offset & 3)))
4986 error ("%qs is not a valid offset "
4987 "in -mstack-protector-guard-offset=", str);
4989 rs6000_stack_protector_guard_offset = offset;
4992 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4994 const char *str = rs6000_stack_protector_guard_reg_str;
4995 int reg = decode_reg_name (str);
4997 if (!IN_RANGE (reg, 1, 31))
4998 error ("%qs is not a valid base register "
4999 "in -mstack-protector-guard-reg=", str);
5001 rs6000_stack_protector_guard_reg = reg;
5004 if (rs6000_stack_protector_guard == SSP_TLS
5005 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5006 error ("-mstack-protector-guard=tls needs a valid base register");
5008 if (global_init_p)
5010 #ifdef TARGET_REGNAMES
5011 /* If the user desires alternate register names, copy in the
5012 alternate names now. */
5013 if (TARGET_REGNAMES)
5014 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5015 #endif
5017 /* Set aix_struct_return last, after the ABI is determined.
5018 If -maix-struct-return or -msvr4-struct-return was explicitly
5019 used, don't override with the ABI default. */
5020 if (!global_options_set.x_aix_struct_return)
5021 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5023 #if 0
5024 /* IBM XL compiler defaults to unsigned bitfields. */
5025 if (TARGET_XL_COMPAT)
5026 flag_signed_bitfields = 0;
5027 #endif
5029 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5030 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5032 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5034 /* We can only guarantee the availability of DI pseudo-ops when
5035 assembling for 64-bit targets. */
5036 if (!TARGET_64BIT)
5038 targetm.asm_out.aligned_op.di = NULL;
5039 targetm.asm_out.unaligned_op.di = NULL;
5043 /* Set branch target alignment, if not optimizing for size. */
5044 if (!optimize_size)
5046 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5047 aligned 8byte to avoid misprediction by the branch predictor. */
5048 if (rs6000_cpu == PROCESSOR_TITAN
5049 || rs6000_cpu == PROCESSOR_CELL)
5051 if (align_functions <= 0)
5052 align_functions = 8;
5053 if (align_jumps <= 0)
5054 align_jumps = 8;
5055 if (align_loops <= 0)
5056 align_loops = 8;
5058 if (rs6000_align_branch_targets)
5060 if (align_functions <= 0)
5061 align_functions = 16;
5062 if (align_jumps <= 0)
5063 align_jumps = 16;
5064 if (align_loops <= 0)
5066 can_override_loop_align = 1;
5067 align_loops = 16;
5070 if (align_jumps_max_skip <= 0)
5071 align_jumps_max_skip = 15;
5072 if (align_loops_max_skip <= 0)
5073 align_loops_max_skip = 15;
5076 /* Arrange to save and restore machine status around nested functions. */
5077 init_machine_status = rs6000_init_machine_status;
5079 /* We should always be splitting complex arguments, but we can't break
5080 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5081 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5082 targetm.calls.split_complex_arg = NULL;
5084 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5085 if (DEFAULT_ABI == ABI_AIX)
5086 targetm.calls.custom_function_descriptors = 0;
5089 /* Initialize rs6000_cost with the appropriate target costs. */
5090 if (optimize_size)
5091 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5092 else
5093 switch (rs6000_cpu)
5095 case PROCESSOR_RS64A:
5096 rs6000_cost = &rs64a_cost;
5097 break;
5099 case PROCESSOR_MPCCORE:
5100 rs6000_cost = &mpccore_cost;
5101 break;
5103 case PROCESSOR_PPC403:
5104 rs6000_cost = &ppc403_cost;
5105 break;
5107 case PROCESSOR_PPC405:
5108 rs6000_cost = &ppc405_cost;
5109 break;
5111 case PROCESSOR_PPC440:
5112 rs6000_cost = &ppc440_cost;
5113 break;
5115 case PROCESSOR_PPC476:
5116 rs6000_cost = &ppc476_cost;
5117 break;
5119 case PROCESSOR_PPC601:
5120 rs6000_cost = &ppc601_cost;
5121 break;
5123 case PROCESSOR_PPC603:
5124 rs6000_cost = &ppc603_cost;
5125 break;
5127 case PROCESSOR_PPC604:
5128 rs6000_cost = &ppc604_cost;
5129 break;
5131 case PROCESSOR_PPC604e:
5132 rs6000_cost = &ppc604e_cost;
5133 break;
5135 case PROCESSOR_PPC620:
5136 rs6000_cost = &ppc620_cost;
5137 break;
5139 case PROCESSOR_PPC630:
5140 rs6000_cost = &ppc630_cost;
5141 break;
5143 case PROCESSOR_CELL:
5144 rs6000_cost = &ppccell_cost;
5145 break;
5147 case PROCESSOR_PPC750:
5148 case PROCESSOR_PPC7400:
5149 rs6000_cost = &ppc750_cost;
5150 break;
5152 case PROCESSOR_PPC7450:
5153 rs6000_cost = &ppc7450_cost;
5154 break;
5156 case PROCESSOR_PPC8540:
5157 case PROCESSOR_PPC8548:
5158 rs6000_cost = &ppc8540_cost;
5159 break;
5161 case PROCESSOR_PPCE300C2:
5162 case PROCESSOR_PPCE300C3:
5163 rs6000_cost = &ppce300c2c3_cost;
5164 break;
5166 case PROCESSOR_PPCE500MC:
5167 rs6000_cost = &ppce500mc_cost;
5168 break;
5170 case PROCESSOR_PPCE500MC64:
5171 rs6000_cost = &ppce500mc64_cost;
5172 break;
5174 case PROCESSOR_PPCE5500:
5175 rs6000_cost = &ppce5500_cost;
5176 break;
5178 case PROCESSOR_PPCE6500:
5179 rs6000_cost = &ppce6500_cost;
5180 break;
5182 case PROCESSOR_TITAN:
5183 rs6000_cost = &titan_cost;
5184 break;
5186 case PROCESSOR_POWER4:
5187 case PROCESSOR_POWER5:
5188 rs6000_cost = &power4_cost;
5189 break;
5191 case PROCESSOR_POWER6:
5192 rs6000_cost = &power6_cost;
5193 break;
5195 case PROCESSOR_POWER7:
5196 rs6000_cost = &power7_cost;
5197 break;
5199 case PROCESSOR_POWER8:
5200 rs6000_cost = &power8_cost;
5201 break;
5203 case PROCESSOR_POWER9:
5204 rs6000_cost = &power9_cost;
5205 break;
5207 case PROCESSOR_PPCA2:
5208 rs6000_cost = &ppca2_cost;
5209 break;
5211 default:
5212 gcc_unreachable ();
5215 if (global_init_p)
5217 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5218 rs6000_cost->simultaneous_prefetches,
5219 global_options.x_param_values,
5220 global_options_set.x_param_values);
5221 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5222 global_options.x_param_values,
5223 global_options_set.x_param_values);
5224 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5225 rs6000_cost->cache_line_size,
5226 global_options.x_param_values,
5227 global_options_set.x_param_values);
5228 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5229 global_options.x_param_values,
5230 global_options_set.x_param_values);
5232 /* Increase loop peeling limits based on performance analysis. */
5233 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5234 global_options.x_param_values,
5235 global_options_set.x_param_values);
5236 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5237 global_options.x_param_values,
5238 global_options_set.x_param_values);
5240 /* Use the 'model' -fsched-pressure algorithm by default. */
5241 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5242 SCHED_PRESSURE_MODEL,
5243 global_options.x_param_values,
5244 global_options_set.x_param_values);
5246 /* If using typedef char *va_list, signal that
5247 __builtin_va_start (&ap, 0) can be optimized to
5248 ap = __builtin_next_arg (0). */
5249 if (DEFAULT_ABI != ABI_V4)
5250 targetm.expand_builtin_va_start = NULL;
5253 /* Set up single/double float flags.
5254 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5255 then set both flags. */
5256 if (TARGET_HARD_FLOAT && rs6000_single_float == 0 && rs6000_double_float == 0)
5257 rs6000_single_float = rs6000_double_float = 1;
5259 /* If not explicitly specified via option, decide whether to generate indexed
5260 load/store instructions. A value of -1 indicates that the
5261 initial value of this variable has not been overwritten. During
5262 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5263 if (TARGET_AVOID_XFORM == -1)
5264 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5265 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5266 need indexed accesses and the type used is the scalar type of the element
5267 being loaded or stored. */
5268 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5269 && !TARGET_ALTIVEC);
5271 /* Set the -mrecip options. */
5272 if (rs6000_recip_name)
5274 char *p = ASTRDUP (rs6000_recip_name);
5275 char *q;
5276 unsigned int mask, i;
5277 bool invert;
5279 while ((q = strtok (p, ",")) != NULL)
5281 p = NULL;
5282 if (*q == '!')
5284 invert = true;
5285 q++;
5287 else
5288 invert = false;
5290 if (!strcmp (q, "default"))
5291 mask = ((TARGET_RECIP_PRECISION)
5292 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5293 else
5295 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5296 if (!strcmp (q, recip_options[i].string))
5298 mask = recip_options[i].mask;
5299 break;
5302 if (i == ARRAY_SIZE (recip_options))
5304 error ("unknown option for -mrecip=%s", q);
5305 invert = false;
5306 mask = 0;
5307 ret = false;
5311 if (invert)
5312 rs6000_recip_control &= ~mask;
5313 else
5314 rs6000_recip_control |= mask;
5318 /* Set the builtin mask of the various options used that could affect which
5319 builtins were used. In the past we used target_flags, but we've run out
5320 of bits, and some options like PAIRED are no longer in target_flags. */
5321 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5322 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5323 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5324 rs6000_builtin_mask);
5326 /* Initialize all of the registers. */
5327 rs6000_init_hard_regno_mode_ok (global_init_p);
5329 /* Save the initial options in case the user does function specific options */
5330 if (global_init_p)
5331 target_option_default_node = target_option_current_node
5332 = build_target_option_node (&global_options);
5334 /* If not explicitly specified via option, decide whether to generate the
5335 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5336 if (TARGET_LINK_STACK == -1)
5337 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5339 return ret;
5342 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5343 define the target cpu type. */
5345 static void
5346 rs6000_option_override (void)
5348 (void) rs6000_option_override_internal (true);
5352 /* Implement targetm.vectorize.builtin_mask_for_load. */
5353 static tree
5354 rs6000_builtin_mask_for_load (void)
5356 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5357 if ((TARGET_ALTIVEC && !TARGET_VSX)
5358 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5359 return altivec_builtin_mask_for_load;
5360 else
5361 return 0;
5364 /* Implement LOOP_ALIGN. */
5366 rs6000_loop_align (rtx label)
5368 basic_block bb;
5369 int ninsns;
5371 /* Don't override loop alignment if -falign-loops was specified. */
5372 if (!can_override_loop_align)
5373 return align_loops_log;
5375 bb = BLOCK_FOR_INSN (label);
5376 ninsns = num_loop_insns(bb->loop_father);
5378 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5379 if (ninsns > 4 && ninsns <= 8
5380 && (rs6000_cpu == PROCESSOR_POWER4
5381 || rs6000_cpu == PROCESSOR_POWER5
5382 || rs6000_cpu == PROCESSOR_POWER6
5383 || rs6000_cpu == PROCESSOR_POWER7
5384 || rs6000_cpu == PROCESSOR_POWER8
5385 || rs6000_cpu == PROCESSOR_POWER9))
5386 return 5;
5387 else
5388 return align_loops_log;
5391 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5392 static int
5393 rs6000_loop_align_max_skip (rtx_insn *label)
5395 return (1 << rs6000_loop_align (label)) - 1;
5398 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5399 after applying N number of iterations. This routine does not determine
5400 how may iterations are required to reach desired alignment. */
5402 static bool
5403 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5405 if (is_packed)
5406 return false;
5408 if (TARGET_32BIT)
5410 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5411 return true;
5413 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5414 return true;
5416 return false;
5418 else
5420 if (TARGET_MACHO)
5421 return false;
5423 /* Assuming that all other types are naturally aligned. CHECKME! */
5424 return true;
5428 /* Return true if the vector misalignment factor is supported by the
5429 target. */
5430 static bool
5431 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5432 const_tree type,
5433 int misalignment,
5434 bool is_packed)
5436 if (TARGET_VSX)
5438 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5439 return true;
5441 /* Return if movmisalign pattern is not supported for this mode. */
5442 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5443 return false;
5445 if (misalignment == -1)
5447 /* Misalignment factor is unknown at compile time but we know
5448 it's word aligned. */
5449 if (rs6000_vector_alignment_reachable (type, is_packed))
5451 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5453 if (element_size == 64 || element_size == 32)
5454 return true;
5457 return false;
5460 /* VSX supports word-aligned vector. */
5461 if (misalignment % 4 == 0)
5462 return true;
5464 return false;
5467 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5468 static int
5469 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5470 tree vectype, int misalign)
5472 unsigned elements;
5473 tree elem_type;
5475 switch (type_of_cost)
5477 case scalar_stmt:
5478 case scalar_load:
5479 case scalar_store:
5480 case vector_stmt:
5481 case vector_load:
5482 case vector_store:
5483 case vec_to_scalar:
5484 case scalar_to_vec:
5485 case cond_branch_not_taken:
5486 return 1;
5488 case vec_perm:
5489 if (TARGET_VSX)
5490 return 3;
5491 else
5492 return 1;
5494 case vec_promote_demote:
5495 if (TARGET_VSX)
5496 return 4;
5497 else
5498 return 1;
5500 case cond_branch_taken:
5501 return 3;
5503 case unaligned_load:
5504 if (TARGET_P9_VECTOR)
5505 return 3;
5507 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5508 return 1;
5510 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5512 elements = TYPE_VECTOR_SUBPARTS (vectype);
5513 if (elements == 2)
5514 /* Double word aligned. */
5515 return 2;
5517 if (elements == 4)
5519 switch (misalign)
5521 case 8:
5522 /* Double word aligned. */
5523 return 2;
5525 case -1:
5526 /* Unknown misalignment. */
5527 case 4:
5528 case 12:
5529 /* Word aligned. */
5530 return 22;
5532 default:
5533 gcc_unreachable ();
5538 if (TARGET_ALTIVEC)
5539 /* Misaligned loads are not supported. */
5540 gcc_unreachable ();
5542 return 2;
5544 case unaligned_store:
5545 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5546 return 1;
5548 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5550 elements = TYPE_VECTOR_SUBPARTS (vectype);
5551 if (elements == 2)
5552 /* Double word aligned. */
5553 return 2;
5555 if (elements == 4)
5557 switch (misalign)
5559 case 8:
5560 /* Double word aligned. */
5561 return 2;
5563 case -1:
5564 /* Unknown misalignment. */
5565 case 4:
5566 case 12:
5567 /* Word aligned. */
5568 return 23;
5570 default:
5571 gcc_unreachable ();
5576 if (TARGET_ALTIVEC)
5577 /* Misaligned stores are not supported. */
5578 gcc_unreachable ();
5580 return 2;
5582 case vec_construct:
5583 /* This is a rough approximation assuming non-constant elements
5584 constructed into a vector via element insertion. FIXME:
5585 vec_construct is not granular enough for uniformly good
5586 decisions. If the initialization is a splat, this is
5587 cheaper than we estimate. Improve this someday. */
5588 elem_type = TREE_TYPE (vectype);
5589 /* 32-bit vectors loaded into registers are stored as double
5590 precision, so we need 2 permutes, 2 converts, and 1 merge
5591 to construct a vector of short floats from them. */
5592 if (SCALAR_FLOAT_TYPE_P (elem_type)
5593 && TYPE_PRECISION (elem_type) == 32)
5594 return 5;
5595 /* On POWER9, integer vector types are built up in GPRs and then
5596 use a direct move (2 cycles). For POWER8 this is even worse,
5597 as we need two direct moves and a merge, and the direct moves
5598 are five cycles. */
5599 else if (INTEGRAL_TYPE_P (elem_type))
5601 if (TARGET_P9_VECTOR)
5602 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5603 else
5604 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5606 else
5607 /* V2DFmode doesn't need a direct move. */
5608 return 2;
5610 default:
5611 gcc_unreachable ();
5615 /* Implement targetm.vectorize.preferred_simd_mode. */
5617 static machine_mode
5618 rs6000_preferred_simd_mode (machine_mode mode)
5620 if (TARGET_VSX)
5621 switch (mode)
5623 case DFmode:
5624 return V2DFmode;
5625 default:;
5627 if (TARGET_ALTIVEC || TARGET_VSX)
5628 switch (mode)
5630 case SFmode:
5631 return V4SFmode;
5632 case TImode:
5633 return V1TImode;
5634 case DImode:
5635 return V2DImode;
5636 case SImode:
5637 return V4SImode;
5638 case HImode:
5639 return V8HImode;
5640 case QImode:
5641 return V16QImode;
5642 default:;
5644 if (TARGET_PAIRED_FLOAT
5645 && mode == SFmode)
5646 return V2SFmode;
5647 return word_mode;
5650 typedef struct _rs6000_cost_data
5652 struct loop *loop_info;
5653 unsigned cost[3];
5654 } rs6000_cost_data;
5656 /* Test for likely overcommitment of vector hardware resources. If a
5657 loop iteration is relatively large, and too large a percentage of
5658 instructions in the loop are vectorized, the cost model may not
5659 adequately reflect delays from unavailable vector resources.
5660 Penalize the loop body cost for this case. */
5662 static void
5663 rs6000_density_test (rs6000_cost_data *data)
5665 const int DENSITY_PCT_THRESHOLD = 85;
5666 const int DENSITY_SIZE_THRESHOLD = 70;
5667 const int DENSITY_PENALTY = 10;
5668 struct loop *loop = data->loop_info;
5669 basic_block *bbs = get_loop_body (loop);
5670 int nbbs = loop->num_nodes;
5671 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5672 int i, density_pct;
5674 for (i = 0; i < nbbs; i++)
5676 basic_block bb = bbs[i];
5677 gimple_stmt_iterator gsi;
5679 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5681 gimple *stmt = gsi_stmt (gsi);
5682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5684 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5685 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5686 not_vec_cost++;
5690 free (bbs);
5691 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5693 if (density_pct > DENSITY_PCT_THRESHOLD
5694 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5696 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_NOTE, vect_location,
5699 "density %d%%, cost %d exceeds threshold, penalizing "
5700 "loop body cost by %d%%", density_pct,
5701 vec_cost + not_vec_cost, DENSITY_PENALTY);
5705 /* Implement targetm.vectorize.init_cost. */
5707 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5708 instruction is needed by the vectorization. */
5709 static bool rs6000_vect_nonmem;
5711 static void *
5712 rs6000_init_cost (struct loop *loop_info)
5714 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5715 data->loop_info = loop_info;
5716 data->cost[vect_prologue] = 0;
5717 data->cost[vect_body] = 0;
5718 data->cost[vect_epilogue] = 0;
5719 rs6000_vect_nonmem = false;
5720 return data;
5723 /* Implement targetm.vectorize.add_stmt_cost. */
5725 static unsigned
5726 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5727 struct _stmt_vec_info *stmt_info, int misalign,
5728 enum vect_cost_model_location where)
5730 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5731 unsigned retval = 0;
5733 if (flag_vect_cost_model)
5735 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5736 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5737 misalign);
5738 /* Statements in an inner loop relative to the loop being
5739 vectorized are weighted more heavily. The value here is
5740 arbitrary and could potentially be improved with analysis. */
5741 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5742 count *= 50; /* FIXME. */
5744 retval = (unsigned) (count * stmt_cost);
5745 cost_data->cost[where] += retval;
5747 /* Check whether we're doing something other than just a copy loop.
5748 Not all such loops may be profitably vectorized; see
5749 rs6000_finish_cost. */
5750 if ((kind == vec_to_scalar || kind == vec_perm
5751 || kind == vec_promote_demote || kind == vec_construct
5752 || kind == scalar_to_vec)
5753 || (where == vect_body && kind == vector_stmt))
5754 rs6000_vect_nonmem = true;
5757 return retval;
5760 /* Implement targetm.vectorize.finish_cost. */
5762 static void
5763 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5764 unsigned *body_cost, unsigned *epilogue_cost)
5766 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5768 if (cost_data->loop_info)
5769 rs6000_density_test (cost_data);
5771 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5772 that require versioning for any reason. The vectorization is at
5773 best a wash inside the loop, and the versioning checks make
5774 profitability highly unlikely and potentially quite harmful. */
5775 if (cost_data->loop_info)
5777 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5778 if (!rs6000_vect_nonmem
5779 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5780 && LOOP_REQUIRES_VERSIONING (vec_info))
5781 cost_data->cost[vect_body] += 10000;
5784 *prologue_cost = cost_data->cost[vect_prologue];
5785 *body_cost = cost_data->cost[vect_body];
5786 *epilogue_cost = cost_data->cost[vect_epilogue];
5789 /* Implement targetm.vectorize.destroy_cost_data. */
5791 static void
5792 rs6000_destroy_cost_data (void *data)
5794 free (data);
5797 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5798 library with vectorized intrinsics. */
5800 static tree
5801 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5802 tree type_in)
5804 char name[32];
5805 const char *suffix = NULL;
5806 tree fntype, new_fndecl, bdecl = NULL_TREE;
5807 int n_args = 1;
5808 const char *bname;
5809 machine_mode el_mode, in_mode;
5810 int n, in_n;
5812 /* Libmass is suitable for unsafe math only as it does not correctly support
5813 parts of IEEE with the required precision such as denormals. Only support
5814 it if we have VSX to use the simd d2 or f4 functions.
5815 XXX: Add variable length support. */
5816 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5817 return NULL_TREE;
5819 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5820 n = TYPE_VECTOR_SUBPARTS (type_out);
5821 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5822 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5823 if (el_mode != in_mode
5824 || n != in_n)
5825 return NULL_TREE;
5827 switch (fn)
5829 CASE_CFN_ATAN2:
5830 CASE_CFN_HYPOT:
5831 CASE_CFN_POW:
5832 n_args = 2;
5833 gcc_fallthrough ();
5835 CASE_CFN_ACOS:
5836 CASE_CFN_ACOSH:
5837 CASE_CFN_ASIN:
5838 CASE_CFN_ASINH:
5839 CASE_CFN_ATAN:
5840 CASE_CFN_ATANH:
5841 CASE_CFN_CBRT:
5842 CASE_CFN_COS:
5843 CASE_CFN_COSH:
5844 CASE_CFN_ERF:
5845 CASE_CFN_ERFC:
5846 CASE_CFN_EXP2:
5847 CASE_CFN_EXP:
5848 CASE_CFN_EXPM1:
5849 CASE_CFN_LGAMMA:
5850 CASE_CFN_LOG10:
5851 CASE_CFN_LOG1P:
5852 CASE_CFN_LOG2:
5853 CASE_CFN_LOG:
5854 CASE_CFN_SIN:
5855 CASE_CFN_SINH:
5856 CASE_CFN_SQRT:
5857 CASE_CFN_TAN:
5858 CASE_CFN_TANH:
5859 if (el_mode == DFmode && n == 2)
5861 bdecl = mathfn_built_in (double_type_node, fn);
5862 suffix = "d2"; /* pow -> powd2 */
5864 else if (el_mode == SFmode && n == 4)
5866 bdecl = mathfn_built_in (float_type_node, fn);
5867 suffix = "4"; /* powf -> powf4 */
5869 else
5870 return NULL_TREE;
5871 if (!bdecl)
5872 return NULL_TREE;
5873 break;
5875 default:
5876 return NULL_TREE;
5879 gcc_assert (suffix != NULL);
5880 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5881 if (!bname)
5882 return NULL_TREE;
5884 strcpy (name, bname + sizeof ("__builtin_") - 1);
5885 strcat (name, suffix);
5887 if (n_args == 1)
5888 fntype = build_function_type_list (type_out, type_in, NULL);
5889 else if (n_args == 2)
5890 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5891 else
5892 gcc_unreachable ();
5894 /* Build a function declaration for the vectorized function. */
5895 new_fndecl = build_decl (BUILTINS_LOCATION,
5896 FUNCTION_DECL, get_identifier (name), fntype);
5897 TREE_PUBLIC (new_fndecl) = 1;
5898 DECL_EXTERNAL (new_fndecl) = 1;
5899 DECL_IS_NOVOPS (new_fndecl) = 1;
5900 TREE_READONLY (new_fndecl) = 1;
5902 return new_fndecl;
5905 /* Returns a function decl for a vectorized version of the builtin function
5906 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5907 if it is not available. */
5909 static tree
5910 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5911 tree type_in)
5913 machine_mode in_mode, out_mode;
5914 int in_n, out_n;
5916 if (TARGET_DEBUG_BUILTIN)
5917 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5918 combined_fn_name (combined_fn (fn)),
5919 GET_MODE_NAME (TYPE_MODE (type_out)),
5920 GET_MODE_NAME (TYPE_MODE (type_in)));
5922 if (TREE_CODE (type_out) != VECTOR_TYPE
5923 || TREE_CODE (type_in) != VECTOR_TYPE
5924 || !TARGET_VECTORIZE_BUILTINS)
5925 return NULL_TREE;
5927 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5928 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5929 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5930 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5932 switch (fn)
5934 CASE_CFN_COPYSIGN:
5935 if (VECTOR_UNIT_VSX_P (V2DFmode)
5936 && out_mode == DFmode && out_n == 2
5937 && in_mode == DFmode && in_n == 2)
5938 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5939 if (VECTOR_UNIT_VSX_P (V4SFmode)
5940 && out_mode == SFmode && out_n == 4
5941 && in_mode == SFmode && in_n == 4)
5942 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5943 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5944 && out_mode == SFmode && out_n == 4
5945 && in_mode == SFmode && in_n == 4)
5946 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5947 break;
5948 CASE_CFN_CEIL:
5949 if (VECTOR_UNIT_VSX_P (V2DFmode)
5950 && out_mode == DFmode && out_n == 2
5951 && in_mode == DFmode && in_n == 2)
5952 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5953 if (VECTOR_UNIT_VSX_P (V4SFmode)
5954 && out_mode == SFmode && out_n == 4
5955 && in_mode == SFmode && in_n == 4)
5956 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5957 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5958 && out_mode == SFmode && out_n == 4
5959 && in_mode == SFmode && in_n == 4)
5960 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5961 break;
5962 CASE_CFN_FLOOR:
5963 if (VECTOR_UNIT_VSX_P (V2DFmode)
5964 && out_mode == DFmode && out_n == 2
5965 && in_mode == DFmode && in_n == 2)
5966 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5967 if (VECTOR_UNIT_VSX_P (V4SFmode)
5968 && out_mode == SFmode && out_n == 4
5969 && in_mode == SFmode && in_n == 4)
5970 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5971 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5972 && out_mode == SFmode && out_n == 4
5973 && in_mode == SFmode && in_n == 4)
5974 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5975 break;
5976 CASE_CFN_FMA:
5977 if (VECTOR_UNIT_VSX_P (V2DFmode)
5978 && out_mode == DFmode && out_n == 2
5979 && in_mode == DFmode && in_n == 2)
5980 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5981 if (VECTOR_UNIT_VSX_P (V4SFmode)
5982 && out_mode == SFmode && out_n == 4
5983 && in_mode == SFmode && in_n == 4)
5984 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5985 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5986 && out_mode == SFmode && out_n == 4
5987 && in_mode == SFmode && in_n == 4)
5988 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5989 break;
5990 CASE_CFN_TRUNC:
5991 if (VECTOR_UNIT_VSX_P (V2DFmode)
5992 && out_mode == DFmode && out_n == 2
5993 && in_mode == DFmode && in_n == 2)
5994 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5995 if (VECTOR_UNIT_VSX_P (V4SFmode)
5996 && out_mode == SFmode && out_n == 4
5997 && in_mode == SFmode && in_n == 4)
5998 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5999 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6000 && out_mode == SFmode && out_n == 4
6001 && in_mode == SFmode && in_n == 4)
6002 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6003 break;
6004 CASE_CFN_NEARBYINT:
6005 if (VECTOR_UNIT_VSX_P (V2DFmode)
6006 && flag_unsafe_math_optimizations
6007 && out_mode == DFmode && out_n == 2
6008 && in_mode == DFmode && in_n == 2)
6009 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6010 if (VECTOR_UNIT_VSX_P (V4SFmode)
6011 && flag_unsafe_math_optimizations
6012 && out_mode == SFmode && out_n == 4
6013 && in_mode == SFmode && in_n == 4)
6014 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6015 break;
6016 CASE_CFN_RINT:
6017 if (VECTOR_UNIT_VSX_P (V2DFmode)
6018 && !flag_trapping_math
6019 && out_mode == DFmode && out_n == 2
6020 && in_mode == DFmode && in_n == 2)
6021 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6022 if (VECTOR_UNIT_VSX_P (V4SFmode)
6023 && !flag_trapping_math
6024 && out_mode == SFmode && out_n == 4
6025 && in_mode == SFmode && in_n == 4)
6026 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6027 break;
6028 default:
6029 break;
6032 /* Generate calls to libmass if appropriate. */
6033 if (rs6000_veclib_handler)
6034 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6036 return NULL_TREE;
6039 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6041 static tree
6042 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6043 tree type_in)
6045 machine_mode in_mode, out_mode;
6046 int in_n, out_n;
6048 if (TARGET_DEBUG_BUILTIN)
6049 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6050 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6051 GET_MODE_NAME (TYPE_MODE (type_out)),
6052 GET_MODE_NAME (TYPE_MODE (type_in)));
6054 if (TREE_CODE (type_out) != VECTOR_TYPE
6055 || TREE_CODE (type_in) != VECTOR_TYPE
6056 || !TARGET_VECTORIZE_BUILTINS)
6057 return NULL_TREE;
6059 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6060 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6061 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6062 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6064 enum rs6000_builtins fn
6065 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6066 switch (fn)
6068 case RS6000_BUILTIN_RSQRTF:
6069 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6070 && out_mode == SFmode && out_n == 4
6071 && in_mode == SFmode && in_n == 4)
6072 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6073 break;
6074 case RS6000_BUILTIN_RSQRT:
6075 if (VECTOR_UNIT_VSX_P (V2DFmode)
6076 && out_mode == DFmode && out_n == 2
6077 && in_mode == DFmode && in_n == 2)
6078 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6079 break;
6080 case RS6000_BUILTIN_RECIPF:
6081 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6082 && out_mode == SFmode && out_n == 4
6083 && in_mode == SFmode && in_n == 4)
6084 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6085 break;
6086 case RS6000_BUILTIN_RECIP:
6087 if (VECTOR_UNIT_VSX_P (V2DFmode)
6088 && out_mode == DFmode && out_n == 2
6089 && in_mode == DFmode && in_n == 2)
6090 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6091 break;
6092 default:
6093 break;
6095 return NULL_TREE;
6098 /* Default CPU string for rs6000*_file_start functions. */
6099 static const char *rs6000_default_cpu;
6101 /* Do anything needed at the start of the asm file. */
6103 static void
6104 rs6000_file_start (void)
6106 char buffer[80];
6107 const char *start = buffer;
6108 FILE *file = asm_out_file;
6110 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6112 default_file_start ();
6114 if (flag_verbose_asm)
6116 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6118 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6120 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6121 start = "";
6124 if (global_options_set.x_rs6000_cpu_index)
6126 fprintf (file, "%s -mcpu=%s", start,
6127 processor_target_table[rs6000_cpu_index].name);
6128 start = "";
6131 if (global_options_set.x_rs6000_tune_index)
6133 fprintf (file, "%s -mtune=%s", start,
6134 processor_target_table[rs6000_tune_index].name);
6135 start = "";
6138 if (PPC405_ERRATUM77)
6140 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6141 start = "";
6144 #ifdef USING_ELFOS_H
6145 switch (rs6000_sdata)
6147 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6148 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6149 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6150 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6153 if (rs6000_sdata && g_switch_value)
6155 fprintf (file, "%s -G %d", start,
6156 g_switch_value);
6157 start = "";
6159 #endif
6161 if (*start == '\0')
6162 putc ('\n', file);
6165 #ifdef USING_ELFOS_H
6166 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6167 && !global_options_set.x_rs6000_cpu_index)
6169 fputs ("\t.machine ", asm_out_file);
6170 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6171 fputs ("power9\n", asm_out_file);
6172 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6173 fputs ("power8\n", asm_out_file);
6174 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6175 fputs ("power7\n", asm_out_file);
6176 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6177 fputs ("power6\n", asm_out_file);
6178 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6179 fputs ("power5\n", asm_out_file);
6180 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6181 fputs ("power4\n", asm_out_file);
6182 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6183 fputs ("ppc64\n", asm_out_file);
6184 else
6185 fputs ("ppc\n", asm_out_file);
6187 #endif
6189 if (DEFAULT_ABI == ABI_ELFv2)
6190 fprintf (file, "\t.abiversion 2\n");
6194 /* Return nonzero if this function is known to have a null epilogue. */
6197 direct_return (void)
6199 if (reload_completed)
6201 rs6000_stack_t *info = rs6000_stack_info ();
6203 if (info->first_gp_reg_save == 32
6204 && info->first_fp_reg_save == 64
6205 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6206 && ! info->lr_save_p
6207 && ! info->cr_save_p
6208 && info->vrsave_size == 0
6209 && ! info->push_p)
6210 return 1;
6213 return 0;
6216 /* Return the number of instructions it takes to form a constant in an
6217 integer register. */
6220 num_insns_constant_wide (HOST_WIDE_INT value)
6222 /* signed constant loadable with addi */
6223 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6224 return 1;
6226 /* constant loadable with addis */
6227 else if ((value & 0xffff) == 0
6228 && (value >> 31 == -1 || value >> 31 == 0))
6229 return 1;
6231 else if (TARGET_POWERPC64)
6233 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6234 HOST_WIDE_INT high = value >> 31;
6236 if (high == 0 || high == -1)
6237 return 2;
6239 high >>= 1;
6241 if (low == 0)
6242 return num_insns_constant_wide (high) + 1;
6243 else if (high == 0)
6244 return num_insns_constant_wide (low) + 1;
6245 else
6246 return (num_insns_constant_wide (high)
6247 + num_insns_constant_wide (low) + 1);
6250 else
6251 return 2;
6255 num_insns_constant (rtx op, machine_mode mode)
6257 HOST_WIDE_INT low, high;
6259 switch (GET_CODE (op))
6261 case CONST_INT:
6262 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6263 && rs6000_is_valid_and_mask (op, mode))
6264 return 2;
6265 else
6266 return num_insns_constant_wide (INTVAL (op));
6268 case CONST_WIDE_INT:
6270 int i;
6271 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6272 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6273 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6274 return ins;
6277 case CONST_DOUBLE:
6278 if (mode == SFmode || mode == SDmode)
6280 long l;
6282 if (DECIMAL_FLOAT_MODE_P (mode))
6283 REAL_VALUE_TO_TARGET_DECIMAL32
6284 (*CONST_DOUBLE_REAL_VALUE (op), l);
6285 else
6286 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6287 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6290 long l[2];
6291 if (DECIMAL_FLOAT_MODE_P (mode))
6292 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6293 else
6294 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6295 high = l[WORDS_BIG_ENDIAN == 0];
6296 low = l[WORDS_BIG_ENDIAN != 0];
6298 if (TARGET_32BIT)
6299 return (num_insns_constant_wide (low)
6300 + num_insns_constant_wide (high));
6301 else
6303 if ((high == 0 && low >= 0)
6304 || (high == -1 && low < 0))
6305 return num_insns_constant_wide (low);
6307 else if (rs6000_is_valid_and_mask (op, mode))
6308 return 2;
6310 else if (low == 0)
6311 return num_insns_constant_wide (high) + 1;
6313 else
6314 return (num_insns_constant_wide (high)
6315 + num_insns_constant_wide (low) + 1);
6318 default:
6319 gcc_unreachable ();
6323 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6324 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6325 corresponding element of the vector, but for V4SFmode and V2SFmode,
6326 the corresponding "float" is interpreted as an SImode integer. */
6328 HOST_WIDE_INT
6329 const_vector_elt_as_int (rtx op, unsigned int elt)
6331 rtx tmp;
6333 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6334 gcc_assert (GET_MODE (op) != V2DImode
6335 && GET_MODE (op) != V2DFmode);
6337 tmp = CONST_VECTOR_ELT (op, elt);
6338 if (GET_MODE (op) == V4SFmode
6339 || GET_MODE (op) == V2SFmode)
6340 tmp = gen_lowpart (SImode, tmp);
6341 return INTVAL (tmp);
6344 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6345 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6346 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6347 all items are set to the same value and contain COPIES replicas of the
6348 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6349 operand and the others are set to the value of the operand's msb. */
6351 static bool
6352 vspltis_constant (rtx op, unsigned step, unsigned copies)
6354 machine_mode mode = GET_MODE (op);
6355 machine_mode inner = GET_MODE_INNER (mode);
6357 unsigned i;
6358 unsigned nunits;
6359 unsigned bitsize;
6360 unsigned mask;
6362 HOST_WIDE_INT val;
6363 HOST_WIDE_INT splat_val;
6364 HOST_WIDE_INT msb_val;
6366 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6367 return false;
6369 nunits = GET_MODE_NUNITS (mode);
6370 bitsize = GET_MODE_BITSIZE (inner);
6371 mask = GET_MODE_MASK (inner);
6373 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6374 splat_val = val;
6375 msb_val = val >= 0 ? 0 : -1;
6377 /* Construct the value to be splatted, if possible. If not, return 0. */
6378 for (i = 2; i <= copies; i *= 2)
6380 HOST_WIDE_INT small_val;
6381 bitsize /= 2;
6382 small_val = splat_val >> bitsize;
6383 mask >>= bitsize;
6384 if (splat_val != ((HOST_WIDE_INT)
6385 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6386 | (small_val & mask)))
6387 return false;
6388 splat_val = small_val;
6391 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6392 if (EASY_VECTOR_15 (splat_val))
6395 /* Also check if we can splat, and then add the result to itself. Do so if
6396 the value is positive, of if the splat instruction is using OP's mode;
6397 for splat_val < 0, the splat and the add should use the same mode. */
6398 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6399 && (splat_val >= 0 || (step == 1 && copies == 1)))
6402 /* Also check if are loading up the most significant bit which can be done by
6403 loading up -1 and shifting the value left by -1. */
6404 else if (EASY_VECTOR_MSB (splat_val, inner))
6407 else
6408 return false;
6410 /* Check if VAL is present in every STEP-th element, and the
6411 other elements are filled with its most significant bit. */
6412 for (i = 1; i < nunits; ++i)
6414 HOST_WIDE_INT desired_val;
6415 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6416 if ((i & (step - 1)) == 0)
6417 desired_val = val;
6418 else
6419 desired_val = msb_val;
6421 if (desired_val != const_vector_elt_as_int (op, elt))
6422 return false;
6425 return true;
6428 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6429 instruction, filling in the bottom elements with 0 or -1.
6431 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6432 for the number of zeroes to shift in, or negative for the number of 0xff
6433 bytes to shift in.
6435 OP is a CONST_VECTOR. */
6438 vspltis_shifted (rtx op)
6440 machine_mode mode = GET_MODE (op);
6441 machine_mode inner = GET_MODE_INNER (mode);
6443 unsigned i, j;
6444 unsigned nunits;
6445 unsigned mask;
6447 HOST_WIDE_INT val;
6449 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6450 return false;
6452 /* We need to create pseudo registers to do the shift, so don't recognize
6453 shift vector constants after reload. */
6454 if (!can_create_pseudo_p ())
6455 return false;
6457 nunits = GET_MODE_NUNITS (mode);
6458 mask = GET_MODE_MASK (inner);
6460 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6462 /* Check if the value can really be the operand of a vspltis[bhw]. */
6463 if (EASY_VECTOR_15 (val))
6466 /* Also check if we are loading up the most significant bit which can be done
6467 by loading up -1 and shifting the value left by -1. */
6468 else if (EASY_VECTOR_MSB (val, inner))
6471 else
6472 return 0;
6474 /* Check if VAL is present in every STEP-th element until we find elements
6475 that are 0 or all 1 bits. */
6476 for (i = 1; i < nunits; ++i)
6478 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6479 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6481 /* If the value isn't the splat value, check for the remaining elements
6482 being 0/-1. */
6483 if (val != elt_val)
6485 if (elt_val == 0)
6487 for (j = i+1; j < nunits; ++j)
6489 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6490 if (const_vector_elt_as_int (op, elt2) != 0)
6491 return 0;
6494 return (nunits - i) * GET_MODE_SIZE (inner);
6497 else if ((elt_val & mask) == mask)
6499 for (j = i+1; j < nunits; ++j)
6501 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6502 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6503 return 0;
6506 return -((nunits - i) * GET_MODE_SIZE (inner));
6509 else
6510 return 0;
6514 /* If all elements are equal, we don't need to do VLSDOI. */
6515 return 0;
6519 /* Return true if OP is of the given MODE and can be synthesized
6520 with a vspltisb, vspltish or vspltisw. */
6522 bool
6523 easy_altivec_constant (rtx op, machine_mode mode)
6525 unsigned step, copies;
6527 if (mode == VOIDmode)
6528 mode = GET_MODE (op);
6529 else if (mode != GET_MODE (op))
6530 return false;
6532 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6533 constants. */
6534 if (mode == V2DFmode)
6535 return zero_constant (op, mode);
6537 else if (mode == V2DImode)
6539 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6540 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6541 return false;
6543 if (zero_constant (op, mode))
6544 return true;
6546 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6547 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6548 return true;
6550 return false;
6553 /* V1TImode is a special container for TImode. Ignore for now. */
6554 else if (mode == V1TImode)
6555 return false;
6557 /* Start with a vspltisw. */
6558 step = GET_MODE_NUNITS (mode) / 4;
6559 copies = 1;
6561 if (vspltis_constant (op, step, copies))
6562 return true;
6564 /* Then try with a vspltish. */
6565 if (step == 1)
6566 copies <<= 1;
6567 else
6568 step >>= 1;
6570 if (vspltis_constant (op, step, copies))
6571 return true;
6573 /* And finally a vspltisb. */
6574 if (step == 1)
6575 copies <<= 1;
6576 else
6577 step >>= 1;
6579 if (vspltis_constant (op, step, copies))
6580 return true;
6582 if (vspltis_shifted (op) != 0)
6583 return true;
6585 return false;
6588 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6589 result is OP. Abort if it is not possible. */
6592 gen_easy_altivec_constant (rtx op)
6594 machine_mode mode = GET_MODE (op);
6595 int nunits = GET_MODE_NUNITS (mode);
6596 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6597 unsigned step = nunits / 4;
6598 unsigned copies = 1;
6600 /* Start with a vspltisw. */
6601 if (vspltis_constant (op, step, copies))
6602 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6604 /* Then try with a vspltish. */
6605 if (step == 1)
6606 copies <<= 1;
6607 else
6608 step >>= 1;
6610 if (vspltis_constant (op, step, copies))
6611 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6613 /* And finally a vspltisb. */
6614 if (step == 1)
6615 copies <<= 1;
6616 else
6617 step >>= 1;
6619 if (vspltis_constant (op, step, copies))
6620 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6622 gcc_unreachable ();
6625 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6626 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6628 Return the number of instructions needed (1 or 2) into the address pointed
6629 via NUM_INSNS_PTR.
6631 Return the constant that is being split via CONSTANT_PTR. */
6633 bool
6634 xxspltib_constant_p (rtx op,
6635 machine_mode mode,
6636 int *num_insns_ptr,
6637 int *constant_ptr)
6639 size_t nunits = GET_MODE_NUNITS (mode);
6640 size_t i;
6641 HOST_WIDE_INT value;
6642 rtx element;
6644 /* Set the returned values to out of bound values. */
6645 *num_insns_ptr = -1;
6646 *constant_ptr = 256;
6648 if (!TARGET_P9_VECTOR)
6649 return false;
6651 if (mode == VOIDmode)
6652 mode = GET_MODE (op);
6654 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6655 return false;
6657 /* Handle (vec_duplicate <constant>). */
6658 if (GET_CODE (op) == VEC_DUPLICATE)
6660 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6661 && mode != V2DImode)
6662 return false;
6664 element = XEXP (op, 0);
6665 if (!CONST_INT_P (element))
6666 return false;
6668 value = INTVAL (element);
6669 if (!IN_RANGE (value, -128, 127))
6670 return false;
6673 /* Handle (const_vector [...]). */
6674 else if (GET_CODE (op) == CONST_VECTOR)
6676 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6677 && mode != V2DImode)
6678 return false;
6680 element = CONST_VECTOR_ELT (op, 0);
6681 if (!CONST_INT_P (element))
6682 return false;
6684 value = INTVAL (element);
6685 if (!IN_RANGE (value, -128, 127))
6686 return false;
6688 for (i = 1; i < nunits; i++)
6690 element = CONST_VECTOR_ELT (op, i);
6691 if (!CONST_INT_P (element))
6692 return false;
6694 if (value != INTVAL (element))
6695 return false;
6699 /* Handle integer constants being loaded into the upper part of the VSX
6700 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6701 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6702 else if (CONST_INT_P (op))
6704 if (!SCALAR_INT_MODE_P (mode))
6705 return false;
6707 value = INTVAL (op);
6708 if (!IN_RANGE (value, -128, 127))
6709 return false;
6711 if (!IN_RANGE (value, -1, 0))
6713 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6714 return false;
6716 if (EASY_VECTOR_15 (value))
6717 return false;
6721 else
6722 return false;
6724 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6725 sign extend. Special case 0/-1 to allow getting any VSX register instead
6726 of an Altivec register. */
6727 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6728 && EASY_VECTOR_15 (value))
6729 return false;
6731 /* Return # of instructions and the constant byte for XXSPLTIB. */
6732 if (mode == V16QImode)
6733 *num_insns_ptr = 1;
6735 else if (IN_RANGE (value, -1, 0))
6736 *num_insns_ptr = 1;
6738 else
6739 *num_insns_ptr = 2;
6741 *constant_ptr = (int) value;
6742 return true;
6745 const char *
6746 output_vec_const_move (rtx *operands)
6748 int shift;
6749 machine_mode mode;
6750 rtx dest, vec;
6752 dest = operands[0];
6753 vec = operands[1];
6754 mode = GET_MODE (dest);
6756 if (TARGET_VSX)
6758 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6759 int xxspltib_value = 256;
6760 int num_insns = -1;
6762 if (zero_constant (vec, mode))
6764 if (TARGET_P9_VECTOR)
6765 return "xxspltib %x0,0";
6767 else if (dest_vmx_p)
6768 return "vspltisw %0,0";
6770 else
6771 return "xxlxor %x0,%x0,%x0";
6774 if (all_ones_constant (vec, mode))
6776 if (TARGET_P9_VECTOR)
6777 return "xxspltib %x0,255";
6779 else if (dest_vmx_p)
6780 return "vspltisw %0,-1";
6782 else if (TARGET_P8_VECTOR)
6783 return "xxlorc %x0,%x0,%x0";
6785 else
6786 gcc_unreachable ();
6789 if (TARGET_P9_VECTOR
6790 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6792 if (num_insns == 1)
6794 operands[2] = GEN_INT (xxspltib_value & 0xff);
6795 return "xxspltib %x0,%2";
6798 return "#";
6802 if (TARGET_ALTIVEC)
6804 rtx splat_vec;
6806 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6807 if (zero_constant (vec, mode))
6808 return "vspltisw %0,0";
6810 if (all_ones_constant (vec, mode))
6811 return "vspltisw %0,-1";
6813 /* Do we need to construct a value using VSLDOI? */
6814 shift = vspltis_shifted (vec);
6815 if (shift != 0)
6816 return "#";
6818 splat_vec = gen_easy_altivec_constant (vec);
6819 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6820 operands[1] = XEXP (splat_vec, 0);
6821 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6822 return "#";
6824 switch (GET_MODE (splat_vec))
6826 case V4SImode:
6827 return "vspltisw %0,%1";
6829 case V8HImode:
6830 return "vspltish %0,%1";
6832 case V16QImode:
6833 return "vspltisb %0,%1";
6835 default:
6836 gcc_unreachable ();
6840 gcc_unreachable ();
6843 /* Initialize TARGET of vector PAIRED to VALS. */
6845 void
6846 paired_expand_vector_init (rtx target, rtx vals)
6848 machine_mode mode = GET_MODE (target);
6849 int n_elts = GET_MODE_NUNITS (mode);
6850 int n_var = 0;
6851 rtx x, new_rtx, tmp, constant_op, op1, op2;
6852 int i;
6854 for (i = 0; i < n_elts; ++i)
6856 x = XVECEXP (vals, 0, i);
6857 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6858 ++n_var;
6860 if (n_var == 0)
6862 /* Load from constant pool. */
6863 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6864 return;
6867 if (n_var == 2)
6869 /* The vector is initialized only with non-constants. */
6870 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6871 XVECEXP (vals, 0, 1));
6873 emit_move_insn (target, new_rtx);
6874 return;
6877 /* One field is non-constant and the other one is a constant. Load the
6878 constant from the constant pool and use ps_merge instruction to
6879 construct the whole vector. */
6880 op1 = XVECEXP (vals, 0, 0);
6881 op2 = XVECEXP (vals, 0, 1);
6883 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6885 tmp = gen_reg_rtx (GET_MODE (constant_op));
6886 emit_move_insn (tmp, constant_op);
6888 if (CONSTANT_P (op1))
6889 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6890 else
6891 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6893 emit_move_insn (target, new_rtx);
6896 void
6897 paired_expand_vector_move (rtx operands[])
6899 rtx op0 = operands[0], op1 = operands[1];
6901 emit_move_insn (op0, op1);
6904 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6905 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6906 operands for the relation operation COND. This is a recursive
6907 function. */
6909 static void
6910 paired_emit_vector_compare (enum rtx_code rcode,
6911 rtx dest, rtx op0, rtx op1,
6912 rtx cc_op0, rtx cc_op1)
6914 rtx tmp = gen_reg_rtx (V2SFmode);
6915 rtx tmp1, max, min;
6917 gcc_assert (TARGET_PAIRED_FLOAT);
6918 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6920 switch (rcode)
6922 case LT:
6923 case LTU:
6924 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6925 return;
6926 case GE:
6927 case GEU:
6928 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6929 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6930 return;
6931 case LE:
6932 case LEU:
6933 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6934 return;
6935 case GT:
6936 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6937 return;
6938 case EQ:
6939 tmp1 = gen_reg_rtx (V2SFmode);
6940 max = gen_reg_rtx (V2SFmode);
6941 min = gen_reg_rtx (V2SFmode);
6942 gen_reg_rtx (V2SFmode);
6944 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6945 emit_insn (gen_selv2sf4
6946 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6947 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6948 emit_insn (gen_selv2sf4
6949 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6950 emit_insn (gen_subv2sf3 (tmp1, min, max));
6951 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6952 return;
6953 case NE:
6954 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6955 return;
6956 case UNLE:
6957 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6958 return;
6959 case UNLT:
6960 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6961 return;
6962 case UNGE:
6963 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6964 return;
6965 case UNGT:
6966 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6967 return;
6968 default:
6969 gcc_unreachable ();
6972 return;
6975 /* Emit vector conditional expression.
6976 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6977 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6980 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6981 rtx cond, rtx cc_op0, rtx cc_op1)
6983 enum rtx_code rcode = GET_CODE (cond);
6985 if (!TARGET_PAIRED_FLOAT)
6986 return 0;
6988 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6990 return 1;
6993 /* Initialize vector TARGET to VALS. */
6995 void
6996 rs6000_expand_vector_init (rtx target, rtx vals)
6998 machine_mode mode = GET_MODE (target);
6999 machine_mode inner_mode = GET_MODE_INNER (mode);
7000 int n_elts = GET_MODE_NUNITS (mode);
7001 int n_var = 0, one_var = -1;
7002 bool all_same = true, all_const_zero = true;
7003 rtx x, mem;
7004 int i;
7006 for (i = 0; i < n_elts; ++i)
7008 x = XVECEXP (vals, 0, i);
7009 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7010 ++n_var, one_var = i;
7011 else if (x != CONST0_RTX (inner_mode))
7012 all_const_zero = false;
7014 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7015 all_same = false;
7018 if (n_var == 0)
7020 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7021 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7022 if ((int_vector_p || TARGET_VSX) && all_const_zero)
7024 /* Zero register. */
7025 emit_move_insn (target, CONST0_RTX (mode));
7026 return;
7028 else if (int_vector_p && easy_vector_constant (const_vec, mode))
7030 /* Splat immediate. */
7031 emit_insn (gen_rtx_SET (target, const_vec));
7032 return;
7034 else
7036 /* Load from constant pool. */
7037 emit_move_insn (target, const_vec);
7038 return;
7042 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7043 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7045 rtx op[2];
7046 size_t i;
7047 size_t num_elements = all_same ? 1 : 2;
7048 for (i = 0; i < num_elements; i++)
7050 op[i] = XVECEXP (vals, 0, i);
7051 /* Just in case there is a SUBREG with a smaller mode, do a
7052 conversion. */
7053 if (GET_MODE (op[i]) != inner_mode)
7055 rtx tmp = gen_reg_rtx (inner_mode);
7056 convert_move (tmp, op[i], 0);
7057 op[i] = tmp;
7059 /* Allow load with splat double word. */
7060 else if (MEM_P (op[i]))
7062 if (!all_same)
7063 op[i] = force_reg (inner_mode, op[i]);
7065 else if (!REG_P (op[i]))
7066 op[i] = force_reg (inner_mode, op[i]);
7069 if (all_same)
7071 if (mode == V2DFmode)
7072 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7073 else
7074 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7076 else
7078 if (mode == V2DFmode)
7079 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7080 else
7081 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7083 return;
7086 /* Special case initializing vector int if we are on 64-bit systems with
7087 direct move or we have the ISA 3.0 instructions. */
7088 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7089 && TARGET_DIRECT_MOVE_64BIT)
7091 if (all_same)
7093 rtx element0 = XVECEXP (vals, 0, 0);
7094 if (MEM_P (element0))
7095 element0 = rs6000_address_for_fpconvert (element0);
7096 else
7097 element0 = force_reg (SImode, element0);
7099 if (TARGET_P9_VECTOR)
7100 emit_insn (gen_vsx_splat_v4si (target, element0));
7101 else
7103 rtx tmp = gen_reg_rtx (DImode);
7104 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7105 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7107 return;
7109 else
7111 rtx elements[4];
7112 size_t i;
7114 for (i = 0; i < 4; i++)
7116 elements[i] = XVECEXP (vals, 0, i);
7117 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7118 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7121 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7122 elements[2], elements[3]));
7123 return;
7127 /* With single precision floating point on VSX, know that internally single
7128 precision is actually represented as a double, and either make 2 V2DF
7129 vectors, and convert these vectors to single precision, or do one
7130 conversion, and splat the result to the other elements. */
7131 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7133 if (all_same)
7135 rtx element0 = XVECEXP (vals, 0, 0);
7137 if (TARGET_P9_VECTOR)
7139 if (MEM_P (element0))
7140 element0 = rs6000_address_for_fpconvert (element0);
7142 emit_insn (gen_vsx_splat_v4sf (target, element0));
7145 else
7147 rtx freg = gen_reg_rtx (V4SFmode);
7148 rtx sreg = force_reg (SFmode, element0);
7149 rtx cvt = (TARGET_XSCVDPSPN
7150 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7151 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7153 emit_insn (cvt);
7154 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7155 const0_rtx));
7158 else
7160 rtx dbl_even = gen_reg_rtx (V2DFmode);
7161 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7162 rtx flt_even = gen_reg_rtx (V4SFmode);
7163 rtx flt_odd = gen_reg_rtx (V4SFmode);
7164 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7165 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7166 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7167 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7169 /* Use VMRGEW if we can instead of doing a permute. */
7170 if (TARGET_P8_VECTOR)
7172 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7173 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7174 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7175 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7176 if (BYTES_BIG_ENDIAN)
7177 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7178 else
7179 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7181 else
7183 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7184 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7185 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7186 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7187 rs6000_expand_extract_even (target, flt_even, flt_odd);
7190 return;
7193 /* Special case initializing vector short/char that are splats if we are on
7194 64-bit systems with direct move. */
7195 if (all_same && TARGET_DIRECT_MOVE_64BIT
7196 && (mode == V16QImode || mode == V8HImode))
7198 rtx op0 = XVECEXP (vals, 0, 0);
7199 rtx di_tmp = gen_reg_rtx (DImode);
7201 if (!REG_P (op0))
7202 op0 = force_reg (GET_MODE_INNER (mode), op0);
7204 if (mode == V16QImode)
7206 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7207 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7208 return;
7211 if (mode == V8HImode)
7213 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7214 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7215 return;
7219 /* Store value to stack temp. Load vector element. Splat. However, splat
7220 of 64-bit items is not supported on Altivec. */
7221 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7223 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7224 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7225 XVECEXP (vals, 0, 0));
7226 x = gen_rtx_UNSPEC (VOIDmode,
7227 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7228 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7229 gen_rtvec (2,
7230 gen_rtx_SET (target, mem),
7231 x)));
7232 x = gen_rtx_VEC_SELECT (inner_mode, target,
7233 gen_rtx_PARALLEL (VOIDmode,
7234 gen_rtvec (1, const0_rtx)));
7235 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7236 return;
7239 /* One field is non-constant. Load constant then overwrite
7240 varying field. */
7241 if (n_var == 1)
7243 rtx copy = copy_rtx (vals);
7245 /* Load constant part of vector, substitute neighboring value for
7246 varying element. */
7247 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7248 rs6000_expand_vector_init (target, copy);
7250 /* Insert variable. */
7251 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7252 return;
7255 /* Construct the vector in memory one field at a time
7256 and load the whole vector. */
7257 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7258 for (i = 0; i < n_elts; i++)
7259 emit_move_insn (adjust_address_nv (mem, inner_mode,
7260 i * GET_MODE_SIZE (inner_mode)),
7261 XVECEXP (vals, 0, i));
7262 emit_move_insn (target, mem);
7265 /* Set field ELT of TARGET to VAL. */
7267 void
7268 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7270 machine_mode mode = GET_MODE (target);
7271 machine_mode inner_mode = GET_MODE_INNER (mode);
7272 rtx reg = gen_reg_rtx (mode);
7273 rtx mask, mem, x;
7274 int width = GET_MODE_SIZE (inner_mode);
7275 int i;
7277 val = force_reg (GET_MODE (val), val);
7279 if (VECTOR_MEM_VSX_P (mode))
7281 rtx insn = NULL_RTX;
7282 rtx elt_rtx = GEN_INT (elt);
7284 if (mode == V2DFmode)
7285 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7287 else if (mode == V2DImode)
7288 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7290 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7292 if (mode == V4SImode)
7293 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7294 else if (mode == V8HImode)
7295 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7296 else if (mode == V16QImode)
7297 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7298 else if (mode == V4SFmode)
7299 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7302 if (insn)
7304 emit_insn (insn);
7305 return;
7309 /* Simplify setting single element vectors like V1TImode. */
7310 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7312 emit_move_insn (target, gen_lowpart (mode, val));
7313 return;
7316 /* Load single variable value. */
7317 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7318 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7319 x = gen_rtx_UNSPEC (VOIDmode,
7320 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7321 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7322 gen_rtvec (2,
7323 gen_rtx_SET (reg, mem),
7324 x)));
7326 /* Linear sequence. */
7327 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7328 for (i = 0; i < 16; ++i)
7329 XVECEXP (mask, 0, i) = GEN_INT (i);
7331 /* Set permute mask to insert element into target. */
7332 for (i = 0; i < width; ++i)
7333 XVECEXP (mask, 0, elt*width + i)
7334 = GEN_INT (i + 0x10);
7335 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7337 if (BYTES_BIG_ENDIAN)
7338 x = gen_rtx_UNSPEC (mode,
7339 gen_rtvec (3, target, reg,
7340 force_reg (V16QImode, x)),
7341 UNSPEC_VPERM);
7342 else
7344 if (TARGET_P9_VECTOR)
7345 x = gen_rtx_UNSPEC (mode,
7346 gen_rtvec (3, target, reg,
7347 force_reg (V16QImode, x)),
7348 UNSPEC_VPERMR);
7349 else
7351 /* Invert selector. We prefer to generate VNAND on P8 so
7352 that future fusion opportunities can kick in, but must
7353 generate VNOR elsewhere. */
7354 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7355 rtx iorx = (TARGET_P8_VECTOR
7356 ? gen_rtx_IOR (V16QImode, notx, notx)
7357 : gen_rtx_AND (V16QImode, notx, notx));
7358 rtx tmp = gen_reg_rtx (V16QImode);
7359 emit_insn (gen_rtx_SET (tmp, iorx));
7361 /* Permute with operands reversed and adjusted selector. */
7362 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7363 UNSPEC_VPERM);
7367 emit_insn (gen_rtx_SET (target, x));
7370 /* Extract field ELT from VEC into TARGET. */
7372 void
7373 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7375 machine_mode mode = GET_MODE (vec);
7376 machine_mode inner_mode = GET_MODE_INNER (mode);
7377 rtx mem;
7379 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7381 switch (mode)
7383 default:
7384 break;
7385 case V1TImode:
7386 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7387 emit_move_insn (target, gen_lowpart (TImode, vec));
7388 break;
7389 case V2DFmode:
7390 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7391 return;
7392 case V2DImode:
7393 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7394 return;
7395 case V4SFmode:
7396 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7397 return;
7398 case V16QImode:
7399 if (TARGET_DIRECT_MOVE_64BIT)
7401 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7402 return;
7404 else
7405 break;
7406 case V8HImode:
7407 if (TARGET_DIRECT_MOVE_64BIT)
7409 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7410 return;
7412 else
7413 break;
7414 case V4SImode:
7415 if (TARGET_DIRECT_MOVE_64BIT)
7417 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7418 return;
7420 break;
7423 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7424 && TARGET_DIRECT_MOVE_64BIT)
7426 if (GET_MODE (elt) != DImode)
7428 rtx tmp = gen_reg_rtx (DImode);
7429 convert_move (tmp, elt, 0);
7430 elt = tmp;
7432 else if (!REG_P (elt))
7433 elt = force_reg (DImode, elt);
7435 switch (mode)
7437 case V2DFmode:
7438 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7439 return;
7441 case V2DImode:
7442 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7443 return;
7445 case V4SFmode:
7446 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7447 return;
7449 case V4SImode:
7450 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7451 return;
7453 case V8HImode:
7454 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7455 return;
7457 case V16QImode:
7458 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7459 return;
7461 default:
7462 gcc_unreachable ();
7466 gcc_assert (CONST_INT_P (elt));
7468 /* Allocate mode-sized buffer. */
7469 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7471 emit_move_insn (mem, vec);
7473 /* Add offset to field within buffer matching vector element. */
7474 mem = adjust_address_nv (mem, inner_mode,
7475 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7477 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7480 /* Helper function to return the register number of a RTX. */
7481 static inline int
7482 regno_or_subregno (rtx op)
7484 if (REG_P (op))
7485 return REGNO (op);
7486 else if (SUBREG_P (op))
7487 return subreg_regno (op);
7488 else
7489 gcc_unreachable ();
7492 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7493 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7494 temporary (BASE_TMP) to fixup the address. Return the new memory address
7495 that is valid for reads or writes to a given register (SCALAR_REG). */
7498 rs6000_adjust_vec_address (rtx scalar_reg,
7499 rtx mem,
7500 rtx element,
7501 rtx base_tmp,
7502 machine_mode scalar_mode)
7504 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7505 rtx addr = XEXP (mem, 0);
7506 rtx element_offset;
7507 rtx new_addr;
7508 bool valid_addr_p;
7510 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7511 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7513 /* Calculate what we need to add to the address to get the element
7514 address. */
7515 if (CONST_INT_P (element))
7516 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7517 else
7519 int byte_shift = exact_log2 (scalar_size);
7520 gcc_assert (byte_shift >= 0);
7522 if (byte_shift == 0)
7523 element_offset = element;
7525 else
7527 if (TARGET_POWERPC64)
7528 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7529 else
7530 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7532 element_offset = base_tmp;
7536 /* Create the new address pointing to the element within the vector. If we
7537 are adding 0, we don't have to change the address. */
7538 if (element_offset == const0_rtx)
7539 new_addr = addr;
7541 /* A simple indirect address can be converted into a reg + offset
7542 address. */
7543 else if (REG_P (addr) || SUBREG_P (addr))
7544 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7546 /* Optimize D-FORM addresses with constant offset with a constant element, to
7547 include the element offset in the address directly. */
7548 else if (GET_CODE (addr) == PLUS)
7550 rtx op0 = XEXP (addr, 0);
7551 rtx op1 = XEXP (addr, 1);
7552 rtx insn;
7554 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7555 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7557 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7558 rtx offset_rtx = GEN_INT (offset);
7560 if (IN_RANGE (offset, -32768, 32767)
7561 && (scalar_size < 8 || (offset & 0x3) == 0))
7562 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7563 else
7565 emit_move_insn (base_tmp, offset_rtx);
7566 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7569 else
7571 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7572 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7574 /* Note, ADDI requires the register being added to be a base
7575 register. If the register was R0, load it up into the temporary
7576 and do the add. */
7577 if (op1_reg_p
7578 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7580 insn = gen_add3_insn (base_tmp, op1, element_offset);
7581 gcc_assert (insn != NULL_RTX);
7582 emit_insn (insn);
7585 else if (ele_reg_p
7586 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7588 insn = gen_add3_insn (base_tmp, element_offset, op1);
7589 gcc_assert (insn != NULL_RTX);
7590 emit_insn (insn);
7593 else
7595 emit_move_insn (base_tmp, op1);
7596 emit_insn (gen_add2_insn (base_tmp, element_offset));
7599 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7603 else
7605 emit_move_insn (base_tmp, addr);
7606 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7609 /* If we have a PLUS, we need to see whether the particular register class
7610 allows for D-FORM or X-FORM addressing. */
7611 if (GET_CODE (new_addr) == PLUS)
7613 rtx op1 = XEXP (new_addr, 1);
7614 addr_mask_type addr_mask;
7615 int scalar_regno = regno_or_subregno (scalar_reg);
7617 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7618 if (INT_REGNO_P (scalar_regno))
7619 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7621 else if (FP_REGNO_P (scalar_regno))
7622 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7624 else if (ALTIVEC_REGNO_P (scalar_regno))
7625 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7627 else
7628 gcc_unreachable ();
7630 if (REG_P (op1) || SUBREG_P (op1))
7631 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7632 else
7633 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7636 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7637 valid_addr_p = true;
7639 else
7640 valid_addr_p = false;
7642 if (!valid_addr_p)
7644 emit_move_insn (base_tmp, new_addr);
7645 new_addr = base_tmp;
7648 return change_address (mem, scalar_mode, new_addr);
7651 /* Split a variable vec_extract operation into the component instructions. */
7653 void
7654 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7655 rtx tmp_altivec)
7657 machine_mode mode = GET_MODE (src);
7658 machine_mode scalar_mode = GET_MODE (dest);
7659 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7660 int byte_shift = exact_log2 (scalar_size);
7662 gcc_assert (byte_shift >= 0);
7664 /* If we are given a memory address, optimize to load just the element. We
7665 don't have to adjust the vector element number on little endian
7666 systems. */
7667 if (MEM_P (src))
7669 gcc_assert (REG_P (tmp_gpr));
7670 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7671 tmp_gpr, scalar_mode));
7672 return;
7675 else if (REG_P (src) || SUBREG_P (src))
7677 int bit_shift = byte_shift + 3;
7678 rtx element2;
7679 int dest_regno = regno_or_subregno (dest);
7680 int src_regno = regno_or_subregno (src);
7681 int element_regno = regno_or_subregno (element);
7683 gcc_assert (REG_P (tmp_gpr));
7685 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7686 a general purpose register. */
7687 if (TARGET_P9_VECTOR
7688 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7689 && INT_REGNO_P (dest_regno)
7690 && ALTIVEC_REGNO_P (src_regno)
7691 && INT_REGNO_P (element_regno))
7693 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7694 rtx element_si = gen_rtx_REG (SImode, element_regno);
7696 if (mode == V16QImode)
7697 emit_insn (VECTOR_ELT_ORDER_BIG
7698 ? gen_vextublx (dest_si, element_si, src)
7699 : gen_vextubrx (dest_si, element_si, src));
7701 else if (mode == V8HImode)
7703 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7704 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7705 emit_insn (VECTOR_ELT_ORDER_BIG
7706 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7707 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7711 else
7713 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7714 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7715 emit_insn (VECTOR_ELT_ORDER_BIG
7716 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7717 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7720 return;
7724 gcc_assert (REG_P (tmp_altivec));
7726 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7727 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7728 will shift the element into the upper position (adding 3 to convert a
7729 byte shift into a bit shift). */
7730 if (scalar_size == 8)
7732 if (!VECTOR_ELT_ORDER_BIG)
7734 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7735 element2 = tmp_gpr;
7737 else
7738 element2 = element;
7740 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7741 bit. */
7742 emit_insn (gen_rtx_SET (tmp_gpr,
7743 gen_rtx_AND (DImode,
7744 gen_rtx_ASHIFT (DImode,
7745 element2,
7746 GEN_INT (6)),
7747 GEN_INT (64))));
7749 else
7751 if (!VECTOR_ELT_ORDER_BIG)
7753 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7755 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7756 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7757 element2 = tmp_gpr;
7759 else
7760 element2 = element;
7762 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7765 /* Get the value into the lower byte of the Altivec register where VSLO
7766 expects it. */
7767 if (TARGET_P9_VECTOR)
7768 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7769 else if (can_create_pseudo_p ())
7770 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7771 else
7773 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7774 emit_move_insn (tmp_di, tmp_gpr);
7775 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7778 /* Do the VSLO to get the value into the final location. */
7779 switch (mode)
7781 case V2DFmode:
7782 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7783 return;
7785 case V2DImode:
7786 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7787 return;
7789 case V4SFmode:
7791 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7792 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7793 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7794 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7795 tmp_altivec));
7797 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7798 return;
7801 case V4SImode:
7802 case V8HImode:
7803 case V16QImode:
7805 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7806 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7807 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7808 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7809 tmp_altivec));
7810 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7811 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7812 GEN_INT (64 - (8 * scalar_size))));
7813 return;
7816 default:
7817 gcc_unreachable ();
7820 return;
7822 else
7823 gcc_unreachable ();
7826 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7827 two SImode values. */
7829 static void
7830 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7832 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7834 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7836 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7837 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7839 emit_move_insn (dest, GEN_INT (const1 | const2));
7840 return;
7843 /* Put si1 into upper 32-bits of dest. */
7844 if (CONST_INT_P (si1))
7845 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7846 else
7848 /* Generate RLDIC. */
7849 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7850 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7851 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7852 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7853 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7854 emit_insn (gen_rtx_SET (dest, and_rtx));
7857 /* Put si2 into the temporary. */
7858 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7859 if (CONST_INT_P (si2))
7860 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7861 else
7862 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7864 /* Combine the two parts. */
7865 emit_insn (gen_iordi3 (dest, dest, tmp));
7866 return;
7869 /* Split a V4SI initialization. */
7871 void
7872 rs6000_split_v4si_init (rtx operands[])
7874 rtx dest = operands[0];
7876 /* Destination is a GPR, build up the two DImode parts in place. */
7877 if (REG_P (dest) || SUBREG_P (dest))
7879 int d_regno = regno_or_subregno (dest);
7880 rtx scalar1 = operands[1];
7881 rtx scalar2 = operands[2];
7882 rtx scalar3 = operands[3];
7883 rtx scalar4 = operands[4];
7884 rtx tmp1 = operands[5];
7885 rtx tmp2 = operands[6];
7887 /* Even though we only need one temporary (plus the destination, which
7888 has an early clobber constraint, try to use two temporaries, one for
7889 each double word created. That way the 2nd insn scheduling pass can
7890 rearrange things so the two parts are done in parallel. */
7891 if (BYTES_BIG_ENDIAN)
7893 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7894 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7895 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7896 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7898 else
7900 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7901 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7902 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7903 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7904 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7906 return;
7909 else
7910 gcc_unreachable ();
7913 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7914 selects whether the alignment is abi mandated, optional, or
7915 both abi and optional alignment. */
7917 unsigned int
7918 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7920 if (how != align_opt)
7922 if (TREE_CODE (type) == VECTOR_TYPE)
7924 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))
7926 if (align < 64)
7927 align = 64;
7929 else if (align < 128)
7930 align = 128;
7934 if (how != align_abi)
7936 if (TREE_CODE (type) == ARRAY_TYPE
7937 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7939 if (align < BITS_PER_WORD)
7940 align = BITS_PER_WORD;
7944 return align;
7947 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7949 bool
7950 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7952 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7954 if (computed != 128)
7956 static bool warned;
7957 if (!warned && warn_psabi)
7959 warned = true;
7960 inform (input_location,
7961 "the layout of aggregates containing vectors with"
7962 " %d-byte alignment has changed in GCC 5",
7963 computed / BITS_PER_UNIT);
7966 /* In current GCC there is no special case. */
7967 return false;
7970 return false;
7973 /* AIX increases natural record alignment to doubleword if the first
7974 field is an FP double while the FP fields remain word aligned. */
7976 unsigned int
7977 rs6000_special_round_type_align (tree type, unsigned int computed,
7978 unsigned int specified)
7980 unsigned int align = MAX (computed, specified);
7981 tree field = TYPE_FIELDS (type);
7983 /* Skip all non field decls */
7984 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7985 field = DECL_CHAIN (field);
7987 if (field != NULL && field != type)
7989 type = TREE_TYPE (field);
7990 while (TREE_CODE (type) == ARRAY_TYPE)
7991 type = TREE_TYPE (type);
7993 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7994 align = MAX (align, 64);
7997 return align;
8000 /* Darwin increases record alignment to the natural alignment of
8001 the first field. */
8003 unsigned int
8004 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8005 unsigned int specified)
8007 unsigned int align = MAX (computed, specified);
8009 if (TYPE_PACKED (type))
8010 return align;
8012 /* Find the first field, looking down into aggregates. */
8013 do {
8014 tree field = TYPE_FIELDS (type);
8015 /* Skip all non field decls */
8016 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8017 field = DECL_CHAIN (field);
8018 if (! field)
8019 break;
8020 /* A packed field does not contribute any extra alignment. */
8021 if (DECL_PACKED (field))
8022 return align;
8023 type = TREE_TYPE (field);
8024 while (TREE_CODE (type) == ARRAY_TYPE)
8025 type = TREE_TYPE (type);
8026 } while (AGGREGATE_TYPE_P (type));
8028 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8029 align = MAX (align, TYPE_ALIGN (type));
8031 return align;
8034 /* Return 1 for an operand in small memory on V.4/eabi. */
8037 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8038 machine_mode mode ATTRIBUTE_UNUSED)
8040 #if TARGET_ELF
8041 rtx sym_ref;
8043 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8044 return 0;
8046 if (DEFAULT_ABI != ABI_V4)
8047 return 0;
8049 if (GET_CODE (op) == SYMBOL_REF)
8050 sym_ref = op;
8052 else if (GET_CODE (op) != CONST
8053 || GET_CODE (XEXP (op, 0)) != PLUS
8054 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8055 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8056 return 0;
8058 else
8060 rtx sum = XEXP (op, 0);
8061 HOST_WIDE_INT summand;
8063 /* We have to be careful here, because it is the referenced address
8064 that must be 32k from _SDA_BASE_, not just the symbol. */
8065 summand = INTVAL (XEXP (sum, 1));
8066 if (summand < 0 || summand > g_switch_value)
8067 return 0;
8069 sym_ref = XEXP (sum, 0);
8072 return SYMBOL_REF_SMALL_P (sym_ref);
8073 #else
8074 return 0;
8075 #endif
8078 /* Return true if either operand is a general purpose register. */
8080 bool
8081 gpr_or_gpr_p (rtx op0, rtx op1)
8083 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8084 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8087 /* Return true if this is a move direct operation between GPR registers and
8088 floating point/VSX registers. */
8090 bool
8091 direct_move_p (rtx op0, rtx op1)
8093 int regno0, regno1;
8095 if (!REG_P (op0) || !REG_P (op1))
8096 return false;
8098 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8099 return false;
8101 regno0 = REGNO (op0);
8102 regno1 = REGNO (op1);
8103 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8104 return false;
8106 if (INT_REGNO_P (regno0))
8107 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8109 else if (INT_REGNO_P (regno1))
8111 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8112 return true;
8114 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8115 return true;
8118 return false;
8121 /* Return true if the OFFSET is valid for the quad address instructions that
8122 use d-form (register + offset) addressing. */
8124 static inline bool
8125 quad_address_offset_p (HOST_WIDE_INT offset)
8127 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8130 /* Return true if the ADDR is an acceptable address for a quad memory
8131 operation of mode MODE (either LQ/STQ for general purpose registers, or
8132 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8133 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8134 3.0 LXV/STXV instruction. */
8136 bool
8137 quad_address_p (rtx addr, machine_mode mode, bool strict)
8139 rtx op0, op1;
8141 if (GET_MODE_SIZE (mode) != 16)
8142 return false;
8144 if (legitimate_indirect_address_p (addr, strict))
8145 return true;
8147 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8148 return false;
8150 if (GET_CODE (addr) != PLUS)
8151 return false;
8153 op0 = XEXP (addr, 0);
8154 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8155 return false;
8157 op1 = XEXP (addr, 1);
8158 if (!CONST_INT_P (op1))
8159 return false;
8161 return quad_address_offset_p (INTVAL (op1));
8164 /* Return true if this is a load or store quad operation. This function does
8165 not handle the atomic quad memory instructions. */
8167 bool
8168 quad_load_store_p (rtx op0, rtx op1)
8170 bool ret;
8172 if (!TARGET_QUAD_MEMORY)
8173 ret = false;
8175 else if (REG_P (op0) && MEM_P (op1))
8176 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8177 && quad_memory_operand (op1, GET_MODE (op1))
8178 && !reg_overlap_mentioned_p (op0, op1));
8180 else if (MEM_P (op0) && REG_P (op1))
8181 ret = (quad_memory_operand (op0, GET_MODE (op0))
8182 && quad_int_reg_operand (op1, GET_MODE (op1)));
8184 else
8185 ret = false;
8187 if (TARGET_DEBUG_ADDR)
8189 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8190 ret ? "true" : "false");
8191 debug_rtx (gen_rtx_SET (op0, op1));
8194 return ret;
8197 /* Given an address, return a constant offset term if one exists. */
8199 static rtx
8200 address_offset (rtx op)
8202 if (GET_CODE (op) == PRE_INC
8203 || GET_CODE (op) == PRE_DEC)
8204 op = XEXP (op, 0);
8205 else if (GET_CODE (op) == PRE_MODIFY
8206 || GET_CODE (op) == LO_SUM)
8207 op = XEXP (op, 1);
8209 if (GET_CODE (op) == CONST)
8210 op = XEXP (op, 0);
8212 if (GET_CODE (op) == PLUS)
8213 op = XEXP (op, 1);
8215 if (CONST_INT_P (op))
8216 return op;
8218 return NULL_RTX;
8221 /* Return true if the MEM operand is a memory operand suitable for use
8222 with a (full width, possibly multiple) gpr load/store. On
8223 powerpc64 this means the offset must be divisible by 4.
8224 Implements 'Y' constraint.
8226 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8227 a constraint function we know the operand has satisfied a suitable
8228 memory predicate. Also accept some odd rtl generated by reload
8229 (see rs6000_legitimize_reload_address for various forms). It is
8230 important that reload rtl be accepted by appropriate constraints
8231 but not by the operand predicate.
8233 Offsetting a lo_sum should not be allowed, except where we know by
8234 alignment that a 32k boundary is not crossed, but see the ???
8235 comment in rs6000_legitimize_reload_address. Note that by
8236 "offsetting" here we mean a further offset to access parts of the
8237 MEM. It's fine to have a lo_sum where the inner address is offset
8238 from a sym, since the same sym+offset will appear in the high part
8239 of the address calculation. */
8241 bool
8242 mem_operand_gpr (rtx op, machine_mode mode)
8244 unsigned HOST_WIDE_INT offset;
8245 int extra;
8246 rtx addr = XEXP (op, 0);
8248 op = address_offset (addr);
8249 if (op == NULL_RTX)
8250 return true;
8252 offset = INTVAL (op);
8253 if (TARGET_POWERPC64 && (offset & 3) != 0)
8254 return false;
8256 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8257 if (extra < 0)
8258 extra = 0;
8260 if (GET_CODE (addr) == LO_SUM)
8261 /* For lo_sum addresses, we must allow any offset except one that
8262 causes a wrap, so test only the low 16 bits. */
8263 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8265 return offset + 0x8000 < 0x10000u - extra;
8268 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8269 enforce an offset divisible by 4 even for 32-bit. */
8271 bool
8272 mem_operand_ds_form (rtx op, machine_mode mode)
8274 unsigned HOST_WIDE_INT offset;
8275 int extra;
8276 rtx addr = XEXP (op, 0);
8278 if (!offsettable_address_p (false, mode, addr))
8279 return false;
8281 op = address_offset (addr);
8282 if (op == NULL_RTX)
8283 return true;
8285 offset = INTVAL (op);
8286 if ((offset & 3) != 0)
8287 return false;
8289 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8290 if (extra < 0)
8291 extra = 0;
8293 if (GET_CODE (addr) == LO_SUM)
8294 /* For lo_sum addresses, we must allow any offset except one that
8295 causes a wrap, so test only the low 16 bits. */
8296 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8298 return offset + 0x8000 < 0x10000u - extra;
8301 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8303 static bool
8304 reg_offset_addressing_ok_p (machine_mode mode)
8306 switch (mode)
8308 case V16QImode:
8309 case V8HImode:
8310 case V4SFmode:
8311 case V4SImode:
8312 case V2DFmode:
8313 case V2DImode:
8314 case V1TImode:
8315 case TImode:
8316 case TFmode:
8317 case KFmode:
8318 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8319 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8320 a vector mode, if we want to use the VSX registers to move it around,
8321 we need to restrict ourselves to reg+reg addressing. Similarly for
8322 IEEE 128-bit floating point that is passed in a single vector
8323 register. */
8324 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8325 return mode_supports_vsx_dform_quad (mode);
8326 break;
8328 case V2SImode:
8329 case V2SFmode:
8330 /* Paired vector modes. Only reg+reg addressing is valid. */
8331 if (TARGET_PAIRED_FLOAT)
8332 return false;
8333 break;
8335 case SDmode:
8336 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8337 addressing for the LFIWZX and STFIWX instructions. */
8338 if (TARGET_NO_SDMODE_STACK)
8339 return false;
8340 break;
8342 default:
8343 break;
8346 return true;
8349 static bool
8350 virtual_stack_registers_memory_p (rtx op)
8352 int regnum;
8354 if (GET_CODE (op) == REG)
8355 regnum = REGNO (op);
8357 else if (GET_CODE (op) == PLUS
8358 && GET_CODE (XEXP (op, 0)) == REG
8359 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8360 regnum = REGNO (XEXP (op, 0));
8362 else
8363 return false;
8365 return (regnum >= FIRST_VIRTUAL_REGISTER
8366 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8369 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8370 is known to not straddle a 32k boundary. This function is used
8371 to determine whether -mcmodel=medium code can use TOC pointer
8372 relative addressing for OP. This means the alignment of the TOC
8373 pointer must also be taken into account, and unfortunately that is
8374 only 8 bytes. */
8376 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8377 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8378 #endif
8380 static bool
8381 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8382 machine_mode mode)
8384 tree decl;
8385 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8387 if (GET_CODE (op) != SYMBOL_REF)
8388 return false;
8390 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8391 SYMBOL_REF. */
8392 if (mode_supports_vsx_dform_quad (mode))
8393 return false;
8395 dsize = GET_MODE_SIZE (mode);
8396 decl = SYMBOL_REF_DECL (op);
8397 if (!decl)
8399 if (dsize == 0)
8400 return false;
8402 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8403 replacing memory addresses with an anchor plus offset. We
8404 could find the decl by rummaging around in the block->objects
8405 VEC for the given offset but that seems like too much work. */
8406 dalign = BITS_PER_UNIT;
8407 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8408 && SYMBOL_REF_ANCHOR_P (op)
8409 && SYMBOL_REF_BLOCK (op) != NULL)
8411 struct object_block *block = SYMBOL_REF_BLOCK (op);
8413 dalign = block->alignment;
8414 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8416 else if (CONSTANT_POOL_ADDRESS_P (op))
8418 /* It would be nice to have get_pool_align().. */
8419 machine_mode cmode = get_pool_mode (op);
8421 dalign = GET_MODE_ALIGNMENT (cmode);
8424 else if (DECL_P (decl))
8426 dalign = DECL_ALIGN (decl);
8428 if (dsize == 0)
8430 /* Allow BLKmode when the entire object is known to not
8431 cross a 32k boundary. */
8432 if (!DECL_SIZE_UNIT (decl))
8433 return false;
8435 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8436 return false;
8438 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8439 if (dsize > 32768)
8440 return false;
8442 dalign /= BITS_PER_UNIT;
8443 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8444 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8445 return dalign >= dsize;
8448 else
8449 gcc_unreachable ();
8451 /* Find how many bits of the alignment we know for this access. */
8452 dalign /= BITS_PER_UNIT;
8453 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8454 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8455 mask = dalign - 1;
8456 lsb = offset & -offset;
8457 mask &= lsb - 1;
8458 dalign = mask + 1;
8460 return dalign >= dsize;
8463 static bool
8464 constant_pool_expr_p (rtx op)
8466 rtx base, offset;
8468 split_const (op, &base, &offset);
8469 return (GET_CODE (base) == SYMBOL_REF
8470 && CONSTANT_POOL_ADDRESS_P (base)
8471 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8474 /* These are only used to pass through from print_operand/print_operand_address
8475 to rs6000_output_addr_const_extra over the intervening function
8476 output_addr_const which is not target code. */
8477 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8479 /* Return true if OP is a toc pointer relative address (the output
8480 of create_TOC_reference). If STRICT, do not match non-split
8481 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8482 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8483 TOCREL_OFFSET_RET respectively. */
8485 bool
8486 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8487 const_rtx *tocrel_offset_ret)
8489 if (!TARGET_TOC)
8490 return false;
8492 if (TARGET_CMODEL != CMODEL_SMALL)
8494 /* When strict ensure we have everything tidy. */
8495 if (strict
8496 && !(GET_CODE (op) == LO_SUM
8497 && REG_P (XEXP (op, 0))
8498 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8499 return false;
8501 /* When not strict, allow non-split TOC addresses and also allow
8502 (lo_sum (high ..)) TOC addresses created during reload. */
8503 if (GET_CODE (op) == LO_SUM)
8504 op = XEXP (op, 1);
8507 const_rtx tocrel_base = op;
8508 const_rtx tocrel_offset = const0_rtx;
8510 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8512 tocrel_base = XEXP (op, 0);
8513 tocrel_offset = XEXP (op, 1);
8516 if (tocrel_base_ret)
8517 *tocrel_base_ret = tocrel_base;
8518 if (tocrel_offset_ret)
8519 *tocrel_offset_ret = tocrel_offset;
8521 return (GET_CODE (tocrel_base) == UNSPEC
8522 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8525 /* Return true if X is a constant pool address, and also for cmodel=medium
8526 if X is a toc-relative address known to be offsettable within MODE. */
8528 bool
8529 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8530 bool strict)
8532 const_rtx tocrel_base, tocrel_offset;
8533 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8534 && (TARGET_CMODEL != CMODEL_MEDIUM
8535 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8536 || mode == QImode
8537 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8538 INTVAL (tocrel_offset), mode)));
8541 static bool
8542 legitimate_small_data_p (machine_mode mode, rtx x)
8544 return (DEFAULT_ABI == ABI_V4
8545 && !flag_pic && !TARGET_TOC
8546 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8547 && small_data_operand (x, mode));
8550 bool
8551 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8552 bool strict, bool worst_case)
8554 unsigned HOST_WIDE_INT offset;
8555 unsigned int extra;
8557 if (GET_CODE (x) != PLUS)
8558 return false;
8559 if (!REG_P (XEXP (x, 0)))
8560 return false;
8561 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8562 return false;
8563 if (mode_supports_vsx_dform_quad (mode))
8564 return quad_address_p (x, mode, strict);
8565 if (!reg_offset_addressing_ok_p (mode))
8566 return virtual_stack_registers_memory_p (x);
8567 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8568 return true;
8569 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8570 return false;
8572 offset = INTVAL (XEXP (x, 1));
8573 extra = 0;
8574 switch (mode)
8576 case V2SImode:
8577 case V2SFmode:
8578 /* Paired single modes: offset addressing isn't valid. */
8579 return false;
8581 case DFmode:
8582 case DDmode:
8583 case DImode:
8584 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8585 addressing. */
8586 if (VECTOR_MEM_VSX_P (mode))
8587 return false;
8589 if (!worst_case)
8590 break;
8591 if (!TARGET_POWERPC64)
8592 extra = 4;
8593 else if (offset & 3)
8594 return false;
8595 break;
8597 case TFmode:
8598 case IFmode:
8599 case KFmode:
8600 case TDmode:
8601 case TImode:
8602 case PTImode:
8603 extra = 8;
8604 if (!worst_case)
8605 break;
8606 if (!TARGET_POWERPC64)
8607 extra = 12;
8608 else if (offset & 3)
8609 return false;
8610 break;
8612 default:
8613 break;
8616 offset += 0x8000;
8617 return offset < 0x10000 - extra;
8620 bool
8621 legitimate_indexed_address_p (rtx x, int strict)
8623 rtx op0, op1;
8625 if (GET_CODE (x) != PLUS)
8626 return false;
8628 op0 = XEXP (x, 0);
8629 op1 = XEXP (x, 1);
8631 /* Recognize the rtl generated by reload which we know will later be
8632 replaced with proper base and index regs. */
8633 if (!strict
8634 && reload_in_progress
8635 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8636 && REG_P (op1))
8637 return true;
8639 return (REG_P (op0) && REG_P (op1)
8640 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8641 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8642 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8643 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8646 bool
8647 avoiding_indexed_address_p (machine_mode mode)
8649 /* Avoid indexed addressing for modes that have non-indexed
8650 load/store instruction forms. */
8651 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8654 bool
8655 legitimate_indirect_address_p (rtx x, int strict)
8657 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8660 bool
8661 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8663 if (!TARGET_MACHO || !flag_pic
8664 || mode != SImode || GET_CODE (x) != MEM)
8665 return false;
8666 x = XEXP (x, 0);
8668 if (GET_CODE (x) != LO_SUM)
8669 return false;
8670 if (GET_CODE (XEXP (x, 0)) != REG)
8671 return false;
8672 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8673 return false;
8674 x = XEXP (x, 1);
8676 return CONSTANT_P (x);
8679 static bool
8680 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8682 if (GET_CODE (x) != LO_SUM)
8683 return false;
8684 if (GET_CODE (XEXP (x, 0)) != REG)
8685 return false;
8686 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8687 return false;
8688 /* quad word addresses are restricted, and we can't use LO_SUM. */
8689 if (mode_supports_vsx_dform_quad (mode))
8690 return false;
8691 x = XEXP (x, 1);
8693 if (TARGET_ELF || TARGET_MACHO)
8695 bool large_toc_ok;
8697 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8698 return false;
8699 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8700 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8701 recognizes some LO_SUM addresses as valid although this
8702 function says opposite. In most cases, LRA through different
8703 transformations can generate correct code for address reloads.
8704 It can not manage only some LO_SUM cases. So we need to add
8705 code analogous to one in rs6000_legitimize_reload_address for
8706 LOW_SUM here saying that some addresses are still valid. */
8707 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8708 && small_toc_ref (x, VOIDmode));
8709 if (TARGET_TOC && ! large_toc_ok)
8710 return false;
8711 if (GET_MODE_NUNITS (mode) != 1)
8712 return false;
8713 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8714 && !(/* ??? Assume floating point reg based on mode? */
8715 TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
8716 && (mode == DFmode || mode == DDmode)))
8717 return false;
8719 return CONSTANT_P (x) || large_toc_ok;
8722 return false;
8726 /* Try machine-dependent ways of modifying an illegitimate address
8727 to be legitimate. If we find one, return the new, valid address.
8728 This is used from only one place: `memory_address' in explow.c.
8730 OLDX is the address as it was before break_out_memory_refs was
8731 called. In some cases it is useful to look at this to decide what
8732 needs to be done.
8734 It is always safe for this function to do nothing. It exists to
8735 recognize opportunities to optimize the output.
8737 On RS/6000, first check for the sum of a register with a constant
8738 integer that is out of range. If so, generate code to add the
8739 constant with the low-order 16 bits masked to the register and force
8740 this result into another register (this can be done with `cau').
8741 Then generate an address of REG+(CONST&0xffff), allowing for the
8742 possibility of bit 16 being a one.
8744 Then check for the sum of a register and something not constant, try to
8745 load the other things into a register and return the sum. */
8747 static rtx
8748 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8749 machine_mode mode)
8751 unsigned int extra;
8753 if (!reg_offset_addressing_ok_p (mode)
8754 || mode_supports_vsx_dform_quad (mode))
8756 if (virtual_stack_registers_memory_p (x))
8757 return x;
8759 /* In theory we should not be seeing addresses of the form reg+0,
8760 but just in case it is generated, optimize it away. */
8761 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8762 return force_reg (Pmode, XEXP (x, 0));
8764 /* For TImode with load/store quad, restrict addresses to just a single
8765 pointer, so it works with both GPRs and VSX registers. */
8766 /* Make sure both operands are registers. */
8767 else if (GET_CODE (x) == PLUS
8768 && (mode != TImode || !TARGET_VSX_TIMODE))
8769 return gen_rtx_PLUS (Pmode,
8770 force_reg (Pmode, XEXP (x, 0)),
8771 force_reg (Pmode, XEXP (x, 1)));
8772 else
8773 return force_reg (Pmode, x);
8775 if (GET_CODE (x) == SYMBOL_REF)
8777 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8778 if (model != 0)
8779 return rs6000_legitimize_tls_address (x, model);
8782 extra = 0;
8783 switch (mode)
8785 case TFmode:
8786 case TDmode:
8787 case TImode:
8788 case PTImode:
8789 case IFmode:
8790 case KFmode:
8791 /* As in legitimate_offset_address_p we do not assume
8792 worst-case. The mode here is just a hint as to the registers
8793 used. A TImode is usually in gprs, but may actually be in
8794 fprs. Leave worst-case scenario for reload to handle via
8795 insn constraints. PTImode is only GPRs. */
8796 extra = 8;
8797 break;
8798 default:
8799 break;
8802 if (GET_CODE (x) == PLUS
8803 && GET_CODE (XEXP (x, 0)) == REG
8804 && GET_CODE (XEXP (x, 1)) == CONST_INT
8805 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8806 >= 0x10000 - extra)
8807 && !PAIRED_VECTOR_MODE (mode))
8809 HOST_WIDE_INT high_int, low_int;
8810 rtx sum;
8811 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8812 if (low_int >= 0x8000 - extra)
8813 low_int = 0;
8814 high_int = INTVAL (XEXP (x, 1)) - low_int;
8815 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8816 GEN_INT (high_int)), 0);
8817 return plus_constant (Pmode, sum, low_int);
8819 else if (GET_CODE (x) == PLUS
8820 && GET_CODE (XEXP (x, 0)) == REG
8821 && GET_CODE (XEXP (x, 1)) != CONST_INT
8822 && GET_MODE_NUNITS (mode) == 1
8823 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8824 || (/* ??? Assume floating point reg based on mode? */
8825 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
8826 && (mode == DFmode || mode == DDmode)))
8827 && !avoiding_indexed_address_p (mode))
8829 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8830 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8832 else if (PAIRED_VECTOR_MODE (mode))
8834 if (mode == DImode)
8835 return x;
8836 /* We accept [reg + reg]. */
8838 if (GET_CODE (x) == PLUS)
8840 rtx op1 = XEXP (x, 0);
8841 rtx op2 = XEXP (x, 1);
8842 rtx y;
8844 op1 = force_reg (Pmode, op1);
8845 op2 = force_reg (Pmode, op2);
8847 /* We can't always do [reg + reg] for these, because [reg +
8848 reg + offset] is not a legitimate addressing mode. */
8849 y = gen_rtx_PLUS (Pmode, op1, op2);
8851 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8852 return force_reg (Pmode, y);
8853 else
8854 return y;
8857 return force_reg (Pmode, x);
8859 else if ((TARGET_ELF
8860 #if TARGET_MACHO
8861 || !MACHO_DYNAMIC_NO_PIC_P
8862 #endif
8864 && TARGET_32BIT
8865 && TARGET_NO_TOC
8866 && ! flag_pic
8867 && GET_CODE (x) != CONST_INT
8868 && GET_CODE (x) != CONST_WIDE_INT
8869 && GET_CODE (x) != CONST_DOUBLE
8870 && CONSTANT_P (x)
8871 && GET_MODE_NUNITS (mode) == 1
8872 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8873 || (/* ??? Assume floating point reg based on mode? */
8874 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
8875 && (mode == DFmode || mode == DDmode))))
8877 rtx reg = gen_reg_rtx (Pmode);
8878 if (TARGET_ELF)
8879 emit_insn (gen_elf_high (reg, x));
8880 else
8881 emit_insn (gen_macho_high (reg, x));
8882 return gen_rtx_LO_SUM (Pmode, reg, x);
8884 else if (TARGET_TOC
8885 && GET_CODE (x) == SYMBOL_REF
8886 && constant_pool_expr_p (x)
8887 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8888 return create_TOC_reference (x, NULL_RTX);
8889 else
8890 return x;
8893 /* Debug version of rs6000_legitimize_address. */
8894 static rtx
8895 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8897 rtx ret;
8898 rtx_insn *insns;
8900 start_sequence ();
8901 ret = rs6000_legitimize_address (x, oldx, mode);
8902 insns = get_insns ();
8903 end_sequence ();
8905 if (ret != x)
8907 fprintf (stderr,
8908 "\nrs6000_legitimize_address: mode %s, old code %s, "
8909 "new code %s, modified\n",
8910 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8911 GET_RTX_NAME (GET_CODE (ret)));
8913 fprintf (stderr, "Original address:\n");
8914 debug_rtx (x);
8916 fprintf (stderr, "oldx:\n");
8917 debug_rtx (oldx);
8919 fprintf (stderr, "New address:\n");
8920 debug_rtx (ret);
8922 if (insns)
8924 fprintf (stderr, "Insns added:\n");
8925 debug_rtx_list (insns, 20);
8928 else
8930 fprintf (stderr,
8931 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8932 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8934 debug_rtx (x);
8937 if (insns)
8938 emit_insn (insns);
8940 return ret;
8943 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8944 We need to emit DTP-relative relocations. */
8946 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8947 static void
8948 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8950 switch (size)
8952 case 4:
8953 fputs ("\t.long\t", file);
8954 break;
8955 case 8:
8956 fputs (DOUBLE_INT_ASM_OP, file);
8957 break;
8958 default:
8959 gcc_unreachable ();
8961 output_addr_const (file, x);
8962 if (TARGET_ELF)
8963 fputs ("@dtprel+0x8000", file);
8964 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8966 switch (SYMBOL_REF_TLS_MODEL (x))
8968 case 0:
8969 break;
8970 case TLS_MODEL_LOCAL_EXEC:
8971 fputs ("@le", file);
8972 break;
8973 case TLS_MODEL_INITIAL_EXEC:
8974 fputs ("@ie", file);
8975 break;
8976 case TLS_MODEL_GLOBAL_DYNAMIC:
8977 case TLS_MODEL_LOCAL_DYNAMIC:
8978 fputs ("@m", file);
8979 break;
8980 default:
8981 gcc_unreachable ();
8986 /* Return true if X is a symbol that refers to real (rather than emulated)
8987 TLS. */
8989 static bool
8990 rs6000_real_tls_symbol_ref_p (rtx x)
8992 return (GET_CODE (x) == SYMBOL_REF
8993 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8996 /* In the name of slightly smaller debug output, and to cater to
8997 general assembler lossage, recognize various UNSPEC sequences
8998 and turn them back into a direct symbol reference. */
9000 static rtx
9001 rs6000_delegitimize_address (rtx orig_x)
9003 rtx x, y, offset;
9005 orig_x = delegitimize_mem_from_attrs (orig_x);
9006 x = orig_x;
9007 if (MEM_P (x))
9008 x = XEXP (x, 0);
9010 y = x;
9011 if (TARGET_CMODEL != CMODEL_SMALL
9012 && GET_CODE (y) == LO_SUM)
9013 y = XEXP (y, 1);
9015 offset = NULL_RTX;
9016 if (GET_CODE (y) == PLUS
9017 && GET_MODE (y) == Pmode
9018 && CONST_INT_P (XEXP (y, 1)))
9020 offset = XEXP (y, 1);
9021 y = XEXP (y, 0);
9024 if (GET_CODE (y) == UNSPEC
9025 && XINT (y, 1) == UNSPEC_TOCREL)
9027 y = XVECEXP (y, 0, 0);
9029 #ifdef HAVE_AS_TLS
9030 /* Do not associate thread-local symbols with the original
9031 constant pool symbol. */
9032 if (TARGET_XCOFF
9033 && GET_CODE (y) == SYMBOL_REF
9034 && CONSTANT_POOL_ADDRESS_P (y)
9035 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9036 return orig_x;
9037 #endif
9039 if (offset != NULL_RTX)
9040 y = gen_rtx_PLUS (Pmode, y, offset);
9041 if (!MEM_P (orig_x))
9042 return y;
9043 else
9044 return replace_equiv_address_nv (orig_x, y);
9047 if (TARGET_MACHO
9048 && GET_CODE (orig_x) == LO_SUM
9049 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9051 y = XEXP (XEXP (orig_x, 1), 0);
9052 if (GET_CODE (y) == UNSPEC
9053 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9054 return XVECEXP (y, 0, 0);
9057 return orig_x;
9060 /* Return true if X shouldn't be emitted into the debug info.
9061 The linker doesn't like .toc section references from
9062 .debug_* sections, so reject .toc section symbols. */
9064 static bool
9065 rs6000_const_not_ok_for_debug_p (rtx x)
9067 if (GET_CODE (x) == SYMBOL_REF
9068 && CONSTANT_POOL_ADDRESS_P (x))
9070 rtx c = get_pool_constant (x);
9071 machine_mode cmode = get_pool_mode (x);
9072 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9073 return true;
9076 return false;
9080 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9082 static bool
9083 rs6000_legitimate_combined_insn (rtx_insn *insn)
9085 int icode = INSN_CODE (insn);
9087 /* Reject creating doloop insns. Combine should not be allowed
9088 to create these for a number of reasons:
9089 1) In a nested loop, if combine creates one of these in an
9090 outer loop and the register allocator happens to allocate ctr
9091 to the outer loop insn, then the inner loop can't use ctr.
9092 Inner loops ought to be more highly optimized.
9093 2) Combine often wants to create one of these from what was
9094 originally a three insn sequence, first combining the three
9095 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9096 allocated ctr, the splitter takes use back to the three insn
9097 sequence. It's better to stop combine at the two insn
9098 sequence.
9099 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9100 insns, the register allocator sometimes uses floating point
9101 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9102 jump insn and output reloads are not implemented for jumps,
9103 the ctrsi/ctrdi splitters need to handle all possible cases.
9104 That's a pain, and it gets to be seriously difficult when a
9105 splitter that runs after reload needs memory to transfer from
9106 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9107 for the difficult case. It's better to not create problems
9108 in the first place. */
9109 if (icode != CODE_FOR_nothing
9110 && (icode == CODE_FOR_ctrsi_internal1
9111 || icode == CODE_FOR_ctrdi_internal1
9112 || icode == CODE_FOR_ctrsi_internal2
9113 || icode == CODE_FOR_ctrdi_internal2
9114 || icode == CODE_FOR_ctrsi_internal3
9115 || icode == CODE_FOR_ctrdi_internal3
9116 || icode == CODE_FOR_ctrsi_internal4
9117 || icode == CODE_FOR_ctrdi_internal4))
9118 return false;
9120 return true;
9123 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9125 static GTY(()) rtx rs6000_tls_symbol;
9126 static rtx
9127 rs6000_tls_get_addr (void)
9129 if (!rs6000_tls_symbol)
9130 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9132 return rs6000_tls_symbol;
9135 /* Construct the SYMBOL_REF for TLS GOT references. */
9137 static GTY(()) rtx rs6000_got_symbol;
9138 static rtx
9139 rs6000_got_sym (void)
9141 if (!rs6000_got_symbol)
9143 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9144 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9145 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9148 return rs6000_got_symbol;
9151 /* AIX Thread-Local Address support. */
9153 static rtx
9154 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9156 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9157 const char *name;
9158 char *tlsname;
9160 name = XSTR (addr, 0);
9161 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9162 or the symbol will be in TLS private data section. */
9163 if (name[strlen (name) - 1] != ']'
9164 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9165 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9167 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9168 strcpy (tlsname, name);
9169 strcat (tlsname,
9170 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9171 tlsaddr = copy_rtx (addr);
9172 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9174 else
9175 tlsaddr = addr;
9177 /* Place addr into TOC constant pool. */
9178 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9180 /* Output the TOC entry and create the MEM referencing the value. */
9181 if (constant_pool_expr_p (XEXP (sym, 0))
9182 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9184 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9185 mem = gen_const_mem (Pmode, tocref);
9186 set_mem_alias_set (mem, get_TOC_alias_set ());
9188 else
9189 return sym;
9191 /* Use global-dynamic for local-dynamic. */
9192 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9193 || model == TLS_MODEL_LOCAL_DYNAMIC)
9195 /* Create new TOC reference for @m symbol. */
9196 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9197 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9198 strcpy (tlsname, "*LCM");
9199 strcat (tlsname, name + 3);
9200 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9201 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9202 tocref = create_TOC_reference (modaddr, NULL_RTX);
9203 rtx modmem = gen_const_mem (Pmode, tocref);
9204 set_mem_alias_set (modmem, get_TOC_alias_set ());
9206 rtx modreg = gen_reg_rtx (Pmode);
9207 emit_insn (gen_rtx_SET (modreg, modmem));
9209 tmpreg = gen_reg_rtx (Pmode);
9210 emit_insn (gen_rtx_SET (tmpreg, mem));
9212 dest = gen_reg_rtx (Pmode);
9213 if (TARGET_32BIT)
9214 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9215 else
9216 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9217 return dest;
9219 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9220 else if (TARGET_32BIT)
9222 tlsreg = gen_reg_rtx (SImode);
9223 emit_insn (gen_tls_get_tpointer (tlsreg));
9225 else
9226 tlsreg = gen_rtx_REG (DImode, 13);
9228 /* Load the TOC value into temporary register. */
9229 tmpreg = gen_reg_rtx (Pmode);
9230 emit_insn (gen_rtx_SET (tmpreg, mem));
9231 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9232 gen_rtx_MINUS (Pmode, addr, tlsreg));
9234 /* Add TOC symbol value to TLS pointer. */
9235 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9237 return dest;
9240 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9241 this (thread-local) address. */
9243 static rtx
9244 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9246 rtx dest, insn;
9248 if (TARGET_XCOFF)
9249 return rs6000_legitimize_tls_address_aix (addr, model);
9251 dest = gen_reg_rtx (Pmode);
9252 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9254 rtx tlsreg;
9256 if (TARGET_64BIT)
9258 tlsreg = gen_rtx_REG (Pmode, 13);
9259 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9261 else
9263 tlsreg = gen_rtx_REG (Pmode, 2);
9264 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9266 emit_insn (insn);
9268 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9270 rtx tlsreg, tmp;
9272 tmp = gen_reg_rtx (Pmode);
9273 if (TARGET_64BIT)
9275 tlsreg = gen_rtx_REG (Pmode, 13);
9276 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9278 else
9280 tlsreg = gen_rtx_REG (Pmode, 2);
9281 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9283 emit_insn (insn);
9284 if (TARGET_64BIT)
9285 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9286 else
9287 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9288 emit_insn (insn);
9290 else
9292 rtx r3, got, tga, tmp1, tmp2, call_insn;
9294 /* We currently use relocations like @got@tlsgd for tls, which
9295 means the linker will handle allocation of tls entries, placing
9296 them in the .got section. So use a pointer to the .got section,
9297 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9298 or to secondary GOT sections used by 32-bit -fPIC. */
9299 if (TARGET_64BIT)
9300 got = gen_rtx_REG (Pmode, 2);
9301 else
9303 if (flag_pic == 1)
9304 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9305 else
9307 rtx gsym = rs6000_got_sym ();
9308 got = gen_reg_rtx (Pmode);
9309 if (flag_pic == 0)
9310 rs6000_emit_move (got, gsym, Pmode);
9311 else
9313 rtx mem, lab;
9315 tmp1 = gen_reg_rtx (Pmode);
9316 tmp2 = gen_reg_rtx (Pmode);
9317 mem = gen_const_mem (Pmode, tmp1);
9318 lab = gen_label_rtx ();
9319 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9320 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9321 if (TARGET_LINK_STACK)
9322 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9323 emit_move_insn (tmp2, mem);
9324 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9325 set_unique_reg_note (last, REG_EQUAL, gsym);
9330 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9332 tga = rs6000_tls_get_addr ();
9333 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9334 1, const0_rtx, Pmode);
9336 r3 = gen_rtx_REG (Pmode, 3);
9337 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9339 if (TARGET_64BIT)
9340 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9341 else
9342 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9344 else if (DEFAULT_ABI == ABI_V4)
9345 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9346 else
9347 gcc_unreachable ();
9348 call_insn = last_call_insn ();
9349 PATTERN (call_insn) = insn;
9350 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9351 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9352 pic_offset_table_rtx);
9354 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9356 tga = rs6000_tls_get_addr ();
9357 tmp1 = gen_reg_rtx (Pmode);
9358 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9359 1, const0_rtx, Pmode);
9361 r3 = gen_rtx_REG (Pmode, 3);
9362 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9364 if (TARGET_64BIT)
9365 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9366 else
9367 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9369 else if (DEFAULT_ABI == ABI_V4)
9370 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9371 else
9372 gcc_unreachable ();
9373 call_insn = last_call_insn ();
9374 PATTERN (call_insn) = insn;
9375 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9376 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9377 pic_offset_table_rtx);
9379 if (rs6000_tls_size == 16)
9381 if (TARGET_64BIT)
9382 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9383 else
9384 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9386 else if (rs6000_tls_size == 32)
9388 tmp2 = gen_reg_rtx (Pmode);
9389 if (TARGET_64BIT)
9390 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9391 else
9392 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9393 emit_insn (insn);
9394 if (TARGET_64BIT)
9395 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9396 else
9397 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9399 else
9401 tmp2 = gen_reg_rtx (Pmode);
9402 if (TARGET_64BIT)
9403 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9404 else
9405 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9406 emit_insn (insn);
9407 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9409 emit_insn (insn);
9411 else
9413 /* IE, or 64-bit offset LE. */
9414 tmp2 = gen_reg_rtx (Pmode);
9415 if (TARGET_64BIT)
9416 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9417 else
9418 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9419 emit_insn (insn);
9420 if (TARGET_64BIT)
9421 insn = gen_tls_tls_64 (dest, tmp2, addr);
9422 else
9423 insn = gen_tls_tls_32 (dest, tmp2, addr);
9424 emit_insn (insn);
9428 return dest;
9431 /* Only create the global variable for the stack protect guard if we are using
9432 the global flavor of that guard. */
9433 static tree
9434 rs6000_init_stack_protect_guard (void)
9436 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9437 return default_stack_protect_guard ();
9439 return NULL_TREE;
9442 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9444 static bool
9445 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9447 if (GET_CODE (x) == HIGH
9448 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9449 return true;
9451 /* A TLS symbol in the TOC cannot contain a sum. */
9452 if (GET_CODE (x) == CONST
9453 && GET_CODE (XEXP (x, 0)) == PLUS
9454 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9455 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9456 return true;
9458 /* Do not place an ELF TLS symbol in the constant pool. */
9459 return TARGET_ELF && tls_referenced_p (x);
9462 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9463 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9464 can be addressed relative to the toc pointer. */
9466 static bool
9467 use_toc_relative_ref (rtx sym, machine_mode mode)
9469 return ((constant_pool_expr_p (sym)
9470 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9471 get_pool_mode (sym)))
9472 || (TARGET_CMODEL == CMODEL_MEDIUM
9473 && SYMBOL_REF_LOCAL_P (sym)
9474 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9477 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9478 replace the input X, or the original X if no replacement is called for.
9479 The output parameter *WIN is 1 if the calling macro should goto WIN,
9480 0 if it should not.
9482 For RS/6000, we wish to handle large displacements off a base
9483 register by splitting the addend across an addiu/addis and the mem insn.
9484 This cuts number of extra insns needed from 3 to 1.
9486 On Darwin, we use this to generate code for floating point constants.
9487 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9488 The Darwin code is inside #if TARGET_MACHO because only then are the
9489 machopic_* functions defined. */
9490 static rtx
9491 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9492 int opnum, int type,
9493 int ind_levels ATTRIBUTE_UNUSED, int *win)
9495 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9496 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9498 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9499 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9500 if (reg_offset_p
9501 && opnum == 1
9502 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9503 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9504 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9505 && TARGET_P9_VECTOR)
9506 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9507 && TARGET_P9_VECTOR)))
9508 reg_offset_p = false;
9510 /* We must recognize output that we have already generated ourselves. */
9511 if (GET_CODE (x) == PLUS
9512 && GET_CODE (XEXP (x, 0)) == PLUS
9513 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9514 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9515 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9517 if (TARGET_DEBUG_ADDR)
9519 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9520 debug_rtx (x);
9522 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9523 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9524 opnum, (enum reload_type) type);
9525 *win = 1;
9526 return x;
9529 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9530 if (GET_CODE (x) == LO_SUM
9531 && GET_CODE (XEXP (x, 0)) == HIGH)
9533 if (TARGET_DEBUG_ADDR)
9535 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9536 debug_rtx (x);
9538 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9539 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9540 opnum, (enum reload_type) type);
9541 *win = 1;
9542 return x;
9545 #if TARGET_MACHO
9546 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9547 && GET_CODE (x) == LO_SUM
9548 && GET_CODE (XEXP (x, 0)) == PLUS
9549 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9550 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9551 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9552 && machopic_operand_p (XEXP (x, 1)))
9554 /* Result of previous invocation of this function on Darwin
9555 floating point constant. */
9556 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9557 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9558 opnum, (enum reload_type) type);
9559 *win = 1;
9560 return x;
9562 #endif
9564 if (TARGET_CMODEL != CMODEL_SMALL
9565 && reg_offset_p
9566 && !quad_offset_p
9567 && small_toc_ref (x, VOIDmode))
9569 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9570 x = gen_rtx_LO_SUM (Pmode, hi, x);
9571 if (TARGET_DEBUG_ADDR)
9573 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9574 debug_rtx (x);
9576 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9577 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9578 opnum, (enum reload_type) type);
9579 *win = 1;
9580 return x;
9583 if (GET_CODE (x) == PLUS
9584 && REG_P (XEXP (x, 0))
9585 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9586 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9587 && CONST_INT_P (XEXP (x, 1))
9588 && reg_offset_p
9589 && !PAIRED_VECTOR_MODE (mode)
9590 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9592 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9593 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9594 HOST_WIDE_INT high
9595 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9597 /* Check for 32-bit overflow or quad addresses with one of the
9598 four least significant bits set. */
9599 if (high + low != val
9600 || (quad_offset_p && (low & 0xf)))
9602 *win = 0;
9603 return x;
9606 /* Reload the high part into a base reg; leave the low part
9607 in the mem directly. */
9609 x = gen_rtx_PLUS (GET_MODE (x),
9610 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9611 GEN_INT (high)),
9612 GEN_INT (low));
9614 if (TARGET_DEBUG_ADDR)
9616 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9617 debug_rtx (x);
9619 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9620 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9621 opnum, (enum reload_type) type);
9622 *win = 1;
9623 return x;
9626 if (GET_CODE (x) == SYMBOL_REF
9627 && reg_offset_p
9628 && !quad_offset_p
9629 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9630 && !PAIRED_VECTOR_MODE (mode)
9631 #if TARGET_MACHO
9632 && DEFAULT_ABI == ABI_DARWIN
9633 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9634 && machopic_symbol_defined_p (x)
9635 #else
9636 && DEFAULT_ABI == ABI_V4
9637 && !flag_pic
9638 #endif
9639 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9640 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9641 without fprs.
9642 ??? Assume floating point reg based on mode? This assumption is
9643 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9644 where reload ends up doing a DFmode load of a constant from
9645 mem using two gprs. Unfortunately, at this point reload
9646 hasn't yet selected regs so poking around in reload data
9647 won't help and even if we could figure out the regs reliably,
9648 we'd still want to allow this transformation when the mem is
9649 naturally aligned. Since we say the address is good here, we
9650 can't disable offsets from LO_SUMs in mem_operand_gpr.
9651 FIXME: Allow offset from lo_sum for other modes too, when
9652 mem is sufficiently aligned.
9654 Also disallow this if the type can go in VMX/Altivec registers, since
9655 those registers do not have d-form (reg+offset) address modes. */
9656 && !reg_addr[mode].scalar_in_vmx_p
9657 && mode != TFmode
9658 && mode != TDmode
9659 && mode != IFmode
9660 && mode != KFmode
9661 && (mode != TImode || !TARGET_VSX_TIMODE)
9662 && mode != PTImode
9663 && (mode != DImode || TARGET_POWERPC64)
9664 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9665 || (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)))
9667 #if TARGET_MACHO
9668 if (flag_pic)
9670 rtx offset = machopic_gen_offset (x);
9671 x = gen_rtx_LO_SUM (GET_MODE (x),
9672 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9673 gen_rtx_HIGH (Pmode, offset)), offset);
9675 else
9676 #endif
9677 x = gen_rtx_LO_SUM (GET_MODE (x),
9678 gen_rtx_HIGH (Pmode, x), x);
9680 if (TARGET_DEBUG_ADDR)
9682 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9683 debug_rtx (x);
9685 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9686 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9687 opnum, (enum reload_type) type);
9688 *win = 1;
9689 return x;
9692 /* Reload an offset address wrapped by an AND that represents the
9693 masking of the lower bits. Strip the outer AND and let reload
9694 convert the offset address into an indirect address. For VSX,
9695 force reload to create the address with an AND in a separate
9696 register, because we can't guarantee an altivec register will
9697 be used. */
9698 if (VECTOR_MEM_ALTIVEC_P (mode)
9699 && GET_CODE (x) == AND
9700 && GET_CODE (XEXP (x, 0)) == PLUS
9701 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9702 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9703 && GET_CODE (XEXP (x, 1)) == CONST_INT
9704 && INTVAL (XEXP (x, 1)) == -16)
9706 x = XEXP (x, 0);
9707 *win = 1;
9708 return x;
9711 if (TARGET_TOC
9712 && reg_offset_p
9713 && !quad_offset_p
9714 && GET_CODE (x) == SYMBOL_REF
9715 && use_toc_relative_ref (x, mode))
9717 x = create_TOC_reference (x, NULL_RTX);
9718 if (TARGET_CMODEL != CMODEL_SMALL)
9720 if (TARGET_DEBUG_ADDR)
9722 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9723 debug_rtx (x);
9725 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9726 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9727 opnum, (enum reload_type) type);
9729 *win = 1;
9730 return x;
9732 *win = 0;
9733 return x;
9736 /* Debug version of rs6000_legitimize_reload_address. */
9737 static rtx
9738 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9739 int opnum, int type,
9740 int ind_levels, int *win)
9742 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9743 ind_levels, win);
9744 fprintf (stderr,
9745 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9746 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9747 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9748 debug_rtx (x);
9750 if (x == ret)
9751 fprintf (stderr, "Same address returned\n");
9752 else if (!ret)
9753 fprintf (stderr, "NULL returned\n");
9754 else
9756 fprintf (stderr, "New address:\n");
9757 debug_rtx (ret);
9760 return ret;
9763 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9764 that is a valid memory address for an instruction.
9765 The MODE argument is the machine mode for the MEM expression
9766 that wants to use this address.
9768 On the RS/6000, there are four valid address: a SYMBOL_REF that
9769 refers to a constant pool entry of an address (or the sum of it
9770 plus a constant), a short (16-bit signed) constant plus a register,
9771 the sum of two registers, or a register indirect, possibly with an
9772 auto-increment. For DFmode, DDmode and DImode with a constant plus
9773 register, we must ensure that both words are addressable or PowerPC64
9774 with offset word aligned.
9776 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9777 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9778 because adjacent memory cells are accessed by adding word-sized offsets
9779 during assembly output. */
9780 static bool
9781 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9783 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9784 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9786 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9787 if (VECTOR_MEM_ALTIVEC_P (mode)
9788 && GET_CODE (x) == AND
9789 && GET_CODE (XEXP (x, 1)) == CONST_INT
9790 && INTVAL (XEXP (x, 1)) == -16)
9791 x = XEXP (x, 0);
9793 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9794 return 0;
9795 if (legitimate_indirect_address_p (x, reg_ok_strict))
9796 return 1;
9797 if (TARGET_UPDATE
9798 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9799 && mode_supports_pre_incdec_p (mode)
9800 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9801 return 1;
9802 /* Handle restricted vector d-form offsets in ISA 3.0. */
9803 if (quad_offset_p)
9805 if (quad_address_p (x, mode, reg_ok_strict))
9806 return 1;
9808 else if (virtual_stack_registers_memory_p (x))
9809 return 1;
9811 else if (reg_offset_p)
9813 if (legitimate_small_data_p (mode, x))
9814 return 1;
9815 if (legitimate_constant_pool_address_p (x, mode,
9816 reg_ok_strict || lra_in_progress))
9817 return 1;
9818 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9819 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9820 return 1;
9823 /* For TImode, if we have TImode in VSX registers, only allow register
9824 indirect addresses. This will allow the values to go in either GPRs
9825 or VSX registers without reloading. The vector types would tend to
9826 go into VSX registers, so we allow REG+REG, while TImode seems
9827 somewhat split, in that some uses are GPR based, and some VSX based. */
9828 /* FIXME: We could loosen this by changing the following to
9829 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9830 but currently we cannot allow REG+REG addressing for TImode. See
9831 PR72827 for complete details on how this ends up hoodwinking DSE. */
9832 if (mode == TImode && TARGET_VSX_TIMODE)
9833 return 0;
9834 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9835 if (! reg_ok_strict
9836 && reg_offset_p
9837 && GET_CODE (x) == PLUS
9838 && GET_CODE (XEXP (x, 0)) == REG
9839 && (XEXP (x, 0) == virtual_stack_vars_rtx
9840 || XEXP (x, 0) == arg_pointer_rtx)
9841 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9842 return 1;
9843 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9844 return 1;
9845 if (!FLOAT128_2REG_P (mode)
9846 && ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
9847 || TARGET_POWERPC64
9848 || (mode != DFmode && mode != DDmode))
9849 && (TARGET_POWERPC64 || mode != DImode)
9850 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9851 && mode != PTImode
9852 && !avoiding_indexed_address_p (mode)
9853 && legitimate_indexed_address_p (x, reg_ok_strict))
9854 return 1;
9855 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9856 && mode_supports_pre_modify_p (mode)
9857 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9858 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9859 reg_ok_strict, false)
9860 || (!avoiding_indexed_address_p (mode)
9861 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9862 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9863 return 1;
9864 if (reg_offset_p && !quad_offset_p
9865 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9866 return 1;
9867 return 0;
9870 /* Debug version of rs6000_legitimate_address_p. */
9871 static bool
9872 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9873 bool reg_ok_strict)
9875 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9876 fprintf (stderr,
9877 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9878 "strict = %d, reload = %s, code = %s\n",
9879 ret ? "true" : "false",
9880 GET_MODE_NAME (mode),
9881 reg_ok_strict,
9882 (reload_completed
9883 ? "after"
9884 : (reload_in_progress ? "progress" : "before")),
9885 GET_RTX_NAME (GET_CODE (x)));
9886 debug_rtx (x);
9888 return ret;
9891 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9893 static bool
9894 rs6000_mode_dependent_address_p (const_rtx addr,
9895 addr_space_t as ATTRIBUTE_UNUSED)
9897 return rs6000_mode_dependent_address_ptr (addr);
9900 /* Go to LABEL if ADDR (a legitimate address expression)
9901 has an effect that depends on the machine mode it is used for.
9903 On the RS/6000 this is true of all integral offsets (since AltiVec
9904 and VSX modes don't allow them) or is a pre-increment or decrement.
9906 ??? Except that due to conceptual problems in offsettable_address_p
9907 we can't really report the problems of integral offsets. So leave
9908 this assuming that the adjustable offset must be valid for the
9909 sub-words of a TFmode operand, which is what we had before. */
9911 static bool
9912 rs6000_mode_dependent_address (const_rtx addr)
9914 switch (GET_CODE (addr))
9916 case PLUS:
9917 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9918 is considered a legitimate address before reload, so there
9919 are no offset restrictions in that case. Note that this
9920 condition is safe in strict mode because any address involving
9921 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9922 been rejected as illegitimate. */
9923 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9924 && XEXP (addr, 0) != arg_pointer_rtx
9925 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9927 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9928 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9930 break;
9932 case LO_SUM:
9933 /* Anything in the constant pool is sufficiently aligned that
9934 all bytes have the same high part address. */
9935 return !legitimate_constant_pool_address_p (addr, QImode, false);
9937 /* Auto-increment cases are now treated generically in recog.c. */
9938 case PRE_MODIFY:
9939 return TARGET_UPDATE;
9941 /* AND is only allowed in Altivec loads. */
9942 case AND:
9943 return true;
9945 default:
9946 break;
9949 return false;
9952 /* Debug version of rs6000_mode_dependent_address. */
9953 static bool
9954 rs6000_debug_mode_dependent_address (const_rtx addr)
9956 bool ret = rs6000_mode_dependent_address (addr);
9958 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9959 ret ? "true" : "false");
9960 debug_rtx (addr);
9962 return ret;
9965 /* Implement FIND_BASE_TERM. */
9968 rs6000_find_base_term (rtx op)
9970 rtx base;
9972 base = op;
9973 if (GET_CODE (base) == CONST)
9974 base = XEXP (base, 0);
9975 if (GET_CODE (base) == PLUS)
9976 base = XEXP (base, 0);
9977 if (GET_CODE (base) == UNSPEC)
9978 switch (XINT (base, 1))
9980 case UNSPEC_TOCREL:
9981 case UNSPEC_MACHOPIC_OFFSET:
9982 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9983 for aliasing purposes. */
9984 return XVECEXP (base, 0, 0);
9987 return op;
9990 /* More elaborate version of recog's offsettable_memref_p predicate
9991 that works around the ??? note of rs6000_mode_dependent_address.
9992 In particular it accepts
9994 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9996 in 32-bit mode, that the recog predicate rejects. */
9998 static bool
9999 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10001 bool worst_case;
10003 if (!MEM_P (op))
10004 return false;
10006 /* First mimic offsettable_memref_p. */
10007 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10008 return true;
10010 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10011 the latter predicate knows nothing about the mode of the memory
10012 reference and, therefore, assumes that it is the largest supported
10013 mode (TFmode). As a consequence, legitimate offsettable memory
10014 references are rejected. rs6000_legitimate_offset_address_p contains
10015 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10016 at least with a little bit of help here given that we know the
10017 actual registers used. */
10018 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10019 || GET_MODE_SIZE (reg_mode) == 4);
10020 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10021 true, worst_case);
10024 /* Determine the reassociation width to be used in reassociate_bb.
10025 This takes into account how many parallel operations we
10026 can actually do of a given type, and also the latency.
10028 int add/sub 6/cycle
10029 mul 2/cycle
10030 vect add/sub/mul 2/cycle
10031 fp add/sub/mul 2/cycle
10032 dfp 1/cycle
10035 static int
10036 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10037 machine_mode mode)
10039 switch (rs6000_cpu)
10041 case PROCESSOR_POWER8:
10042 case PROCESSOR_POWER9:
10043 if (DECIMAL_FLOAT_MODE_P (mode))
10044 return 1;
10045 if (VECTOR_MODE_P (mode))
10046 return 4;
10047 if (INTEGRAL_MODE_P (mode))
10048 return opc == MULT_EXPR ? 4 : 6;
10049 if (FLOAT_MODE_P (mode))
10050 return 4;
10051 break;
10052 default:
10053 break;
10055 return 1;
10058 /* Change register usage conditional on target flags. */
10059 static void
10060 rs6000_conditional_register_usage (void)
10062 int i;
10064 if (TARGET_DEBUG_TARGET)
10065 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10067 /* Set MQ register fixed (already call_used) so that it will not be
10068 allocated. */
10069 fixed_regs[64] = 1;
10071 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10072 if (TARGET_64BIT)
10073 fixed_regs[13] = call_used_regs[13]
10074 = call_really_used_regs[13] = 1;
10076 /* Conditionally disable FPRs. */
10077 if (TARGET_SOFT_FLOAT)
10078 for (i = 32; i < 64; i++)
10079 fixed_regs[i] = call_used_regs[i]
10080 = call_really_used_regs[i] = 1;
10082 /* The TOC register is not killed across calls in a way that is
10083 visible to the compiler. */
10084 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10085 call_really_used_regs[2] = 0;
10087 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10088 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10090 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10091 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10092 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10093 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10095 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10096 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10097 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10098 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10100 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10101 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10102 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10104 if (!TARGET_ALTIVEC && !TARGET_VSX)
10106 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10107 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10108 call_really_used_regs[VRSAVE_REGNO] = 1;
10111 if (TARGET_ALTIVEC || TARGET_VSX)
10112 global_regs[VSCR_REGNO] = 1;
10114 if (TARGET_ALTIVEC_ABI)
10116 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10117 call_used_regs[i] = call_really_used_regs[i] = 1;
10119 /* AIX reserves VR20:31 in non-extended ABI mode. */
10120 if (TARGET_XCOFF)
10121 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10122 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10127 /* Output insns to set DEST equal to the constant SOURCE as a series of
10128 lis, ori and shl instructions and return TRUE. */
10130 bool
10131 rs6000_emit_set_const (rtx dest, rtx source)
10133 machine_mode mode = GET_MODE (dest);
10134 rtx temp, set;
10135 rtx_insn *insn;
10136 HOST_WIDE_INT c;
10138 gcc_checking_assert (CONST_INT_P (source));
10139 c = INTVAL (source);
10140 switch (mode)
10142 case QImode:
10143 case HImode:
10144 emit_insn (gen_rtx_SET (dest, source));
10145 return true;
10147 case SImode:
10148 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10150 emit_insn (gen_rtx_SET (copy_rtx (temp),
10151 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10152 emit_insn (gen_rtx_SET (dest,
10153 gen_rtx_IOR (SImode, copy_rtx (temp),
10154 GEN_INT (c & 0xffff))));
10155 break;
10157 case DImode:
10158 if (!TARGET_POWERPC64)
10160 rtx hi, lo;
10162 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10163 DImode);
10164 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10165 DImode);
10166 emit_move_insn (hi, GEN_INT (c >> 32));
10167 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10168 emit_move_insn (lo, GEN_INT (c));
10170 else
10171 rs6000_emit_set_long_const (dest, c);
10172 break;
10174 default:
10175 gcc_unreachable ();
10178 insn = get_last_insn ();
10179 set = single_set (insn);
10180 if (! CONSTANT_P (SET_SRC (set)))
10181 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10183 return true;
10186 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10187 Output insns to set DEST equal to the constant C as a series of
10188 lis, ori and shl instructions. */
10190 static void
10191 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10193 rtx temp;
10194 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10196 ud1 = c & 0xffff;
10197 c = c >> 16;
10198 ud2 = c & 0xffff;
10199 c = c >> 16;
10200 ud3 = c & 0xffff;
10201 c = c >> 16;
10202 ud4 = c & 0xffff;
10204 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10205 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10206 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10208 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10209 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10211 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10213 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10214 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10215 if (ud1 != 0)
10216 emit_move_insn (dest,
10217 gen_rtx_IOR (DImode, copy_rtx (temp),
10218 GEN_INT (ud1)));
10220 else if (ud3 == 0 && ud4 == 0)
10222 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10224 gcc_assert (ud2 & 0x8000);
10225 emit_move_insn (copy_rtx (temp),
10226 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10227 if (ud1 != 0)
10228 emit_move_insn (copy_rtx (temp),
10229 gen_rtx_IOR (DImode, copy_rtx (temp),
10230 GEN_INT (ud1)));
10231 emit_move_insn (dest,
10232 gen_rtx_ZERO_EXTEND (DImode,
10233 gen_lowpart (SImode,
10234 copy_rtx (temp))));
10236 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10237 || (ud4 == 0 && ! (ud3 & 0x8000)))
10239 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10241 emit_move_insn (copy_rtx (temp),
10242 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10243 if (ud2 != 0)
10244 emit_move_insn (copy_rtx (temp),
10245 gen_rtx_IOR (DImode, copy_rtx (temp),
10246 GEN_INT (ud2)));
10247 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10248 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10249 GEN_INT (16)));
10250 if (ud1 != 0)
10251 emit_move_insn (dest,
10252 gen_rtx_IOR (DImode, copy_rtx (temp),
10253 GEN_INT (ud1)));
10255 else
10257 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10259 emit_move_insn (copy_rtx (temp),
10260 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10261 if (ud3 != 0)
10262 emit_move_insn (copy_rtx (temp),
10263 gen_rtx_IOR (DImode, copy_rtx (temp),
10264 GEN_INT (ud3)));
10266 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10267 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10268 GEN_INT (32)));
10269 if (ud2 != 0)
10270 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10271 gen_rtx_IOR (DImode, copy_rtx (temp),
10272 GEN_INT (ud2 << 16)));
10273 if (ud1 != 0)
10274 emit_move_insn (dest,
10275 gen_rtx_IOR (DImode, copy_rtx (temp),
10276 GEN_INT (ud1)));
10280 /* Helper for the following. Get rid of [r+r] memory refs
10281 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10283 static void
10284 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10286 if (reload_in_progress)
10287 return;
10289 if (GET_CODE (operands[0]) == MEM
10290 && GET_CODE (XEXP (operands[0], 0)) != REG
10291 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10292 GET_MODE (operands[0]), false))
10293 operands[0]
10294 = replace_equiv_address (operands[0],
10295 copy_addr_to_reg (XEXP (operands[0], 0)));
10297 if (GET_CODE (operands[1]) == MEM
10298 && GET_CODE (XEXP (operands[1], 0)) != REG
10299 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10300 GET_MODE (operands[1]), false))
10301 operands[1]
10302 = replace_equiv_address (operands[1],
10303 copy_addr_to_reg (XEXP (operands[1], 0)));
10306 /* Generate a vector of constants to permute MODE for a little-endian
10307 storage operation by swapping the two halves of a vector. */
10308 static rtvec
10309 rs6000_const_vec (machine_mode mode)
10311 int i, subparts;
10312 rtvec v;
10314 switch (mode)
10316 case V1TImode:
10317 subparts = 1;
10318 break;
10319 case V2DFmode:
10320 case V2DImode:
10321 subparts = 2;
10322 break;
10323 case V4SFmode:
10324 case V4SImode:
10325 subparts = 4;
10326 break;
10327 case V8HImode:
10328 subparts = 8;
10329 break;
10330 case V16QImode:
10331 subparts = 16;
10332 break;
10333 default:
10334 gcc_unreachable();
10337 v = rtvec_alloc (subparts);
10339 for (i = 0; i < subparts / 2; ++i)
10340 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10341 for (i = subparts / 2; i < subparts; ++i)
10342 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10344 return v;
10347 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10348 store operation. */
10349 void
10350 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10352 /* Scalar permutations are easier to express in integer modes rather than
10353 floating-point modes, so cast them here. We use V1TImode instead
10354 of TImode to ensure that the values don't go through GPRs. */
10355 if (FLOAT128_VECTOR_P (mode))
10357 dest = gen_lowpart (V1TImode, dest);
10358 source = gen_lowpart (V1TImode, source);
10359 mode = V1TImode;
10362 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10363 scalar. */
10364 if (mode == TImode || mode == V1TImode)
10365 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10366 GEN_INT (64))));
10367 else
10369 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10370 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10374 /* Emit a little-endian load from vector memory location SOURCE to VSX
10375 register DEST in mode MODE. The load is done with two permuting
10376 insn's that represent an lxvd2x and xxpermdi. */
10377 void
10378 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10380 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10381 V1TImode). */
10382 if (mode == TImode || mode == V1TImode)
10384 mode = V2DImode;
10385 dest = gen_lowpart (V2DImode, dest);
10386 source = adjust_address (source, V2DImode, 0);
10389 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10390 rs6000_emit_le_vsx_permute (tmp, source, mode);
10391 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10394 /* Emit a little-endian store to vector memory location DEST from VSX
10395 register SOURCE in mode MODE. The store is done with two permuting
10396 insn's that represent an xxpermdi and an stxvd2x. */
10397 void
10398 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10400 /* This should never be called during or after reload, because it does
10401 not re-permute the source register. It is intended only for use
10402 during expand. */
10403 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10405 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10406 V1TImode). */
10407 if (mode == TImode || mode == V1TImode)
10409 mode = V2DImode;
10410 dest = adjust_address (dest, V2DImode, 0);
10411 source = gen_lowpart (V2DImode, source);
10414 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10415 rs6000_emit_le_vsx_permute (tmp, source, mode);
10416 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10419 /* Emit a sequence representing a little-endian VSX load or store,
10420 moving data from SOURCE to DEST in mode MODE. This is done
10421 separately from rs6000_emit_move to ensure it is called only
10422 during expand. LE VSX loads and stores introduced later are
10423 handled with a split. The expand-time RTL generation allows
10424 us to optimize away redundant pairs of register-permutes. */
10425 void
10426 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10428 gcc_assert (!BYTES_BIG_ENDIAN
10429 && VECTOR_MEM_VSX_P (mode)
10430 && !TARGET_P9_VECTOR
10431 && !gpr_or_gpr_p (dest, source)
10432 && (MEM_P (source) ^ MEM_P (dest)));
10434 if (MEM_P (source))
10436 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10437 rs6000_emit_le_vsx_load (dest, source, mode);
10439 else
10441 if (!REG_P (source))
10442 source = force_reg (mode, source);
10443 rs6000_emit_le_vsx_store (dest, source, mode);
10447 /* Return whether a SFmode or SImode move can be done without converting one
10448 mode to another. This arrises when we have:
10450 (SUBREG:SF (REG:SI ...))
10451 (SUBREG:SI (REG:SF ...))
10453 and one of the values is in a floating point/vector register, where SFmode
10454 scalars are stored in DFmode format. */
10456 bool
10457 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10459 if (TARGET_ALLOW_SF_SUBREG)
10460 return true;
10462 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10463 return true;
10465 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10466 return true;
10468 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10469 if (SUBREG_P (dest))
10471 rtx dest_subreg = SUBREG_REG (dest);
10472 rtx src_subreg = SUBREG_REG (src);
10473 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10476 return false;
10480 /* Helper function to change moves with:
10482 (SUBREG:SF (REG:SI)) and
10483 (SUBREG:SI (REG:SF))
10485 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10486 values are stored as DFmode values in the VSX registers. We need to convert
10487 the bits before we can use a direct move or operate on the bits in the
10488 vector register as an integer type.
10490 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10492 static bool
10493 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10495 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10496 && !lra_in_progress
10497 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10498 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10500 rtx inner_source = SUBREG_REG (source);
10501 machine_mode inner_mode = GET_MODE (inner_source);
10503 if (mode == SImode && inner_mode == SFmode)
10505 emit_insn (gen_movsi_from_sf (dest, inner_source));
10506 return true;
10509 if (mode == SFmode && inner_mode == SImode)
10511 emit_insn (gen_movsf_from_si (dest, inner_source));
10512 return true;
10516 return false;
10519 /* Emit a move from SOURCE to DEST in mode MODE. */
10520 void
10521 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10523 rtx operands[2];
10524 operands[0] = dest;
10525 operands[1] = source;
10527 if (TARGET_DEBUG_ADDR)
10529 fprintf (stderr,
10530 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10531 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10532 GET_MODE_NAME (mode),
10533 reload_in_progress,
10534 reload_completed,
10535 can_create_pseudo_p ());
10536 debug_rtx (dest);
10537 fprintf (stderr, "source:\n");
10538 debug_rtx (source);
10541 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10542 if (CONST_WIDE_INT_P (operands[1])
10543 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10545 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10546 gcc_unreachable ();
10549 /* See if we need to special case SImode/SFmode SUBREG moves. */
10550 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10551 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10552 return;
10554 /* Check if GCC is setting up a block move that will end up using FP
10555 registers as temporaries. We must make sure this is acceptable. */
10556 if (GET_CODE (operands[0]) == MEM
10557 && GET_CODE (operands[1]) == MEM
10558 && mode == DImode
10559 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10560 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10561 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10562 ? 32 : MEM_ALIGN (operands[0])))
10563 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10564 ? 32
10565 : MEM_ALIGN (operands[1]))))
10566 && ! MEM_VOLATILE_P (operands [0])
10567 && ! MEM_VOLATILE_P (operands [1]))
10569 emit_move_insn (adjust_address (operands[0], SImode, 0),
10570 adjust_address (operands[1], SImode, 0));
10571 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10572 adjust_address (copy_rtx (operands[1]), SImode, 4));
10573 return;
10576 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10577 && !gpc_reg_operand (operands[1], mode))
10578 operands[1] = force_reg (mode, operands[1]);
10580 /* Recognize the case where operand[1] is a reference to thread-local
10581 data and load its address to a register. */
10582 if (tls_referenced_p (operands[1]))
10584 enum tls_model model;
10585 rtx tmp = operands[1];
10586 rtx addend = NULL;
10588 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10590 addend = XEXP (XEXP (tmp, 0), 1);
10591 tmp = XEXP (XEXP (tmp, 0), 0);
10594 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10595 model = SYMBOL_REF_TLS_MODEL (tmp);
10596 gcc_assert (model != 0);
10598 tmp = rs6000_legitimize_tls_address (tmp, model);
10599 if (addend)
10601 tmp = gen_rtx_PLUS (mode, tmp, addend);
10602 tmp = force_operand (tmp, operands[0]);
10604 operands[1] = tmp;
10607 /* Handle the case where reload calls us with an invalid address. */
10608 if (reload_in_progress && mode == Pmode
10609 && (! general_operand (operands[1], mode)
10610 || ! nonimmediate_operand (operands[0], mode)))
10611 goto emit_set;
10613 /* 128-bit constant floating-point values on Darwin should really be loaded
10614 as two parts. However, this premature splitting is a problem when DFmode
10615 values can go into Altivec registers. */
10616 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10617 && GET_CODE (operands[1]) == CONST_DOUBLE)
10619 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10620 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10621 DFmode);
10622 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10623 GET_MODE_SIZE (DFmode)),
10624 simplify_gen_subreg (DFmode, operands[1], mode,
10625 GET_MODE_SIZE (DFmode)),
10626 DFmode);
10627 return;
10630 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10631 cfun->machine->sdmode_stack_slot =
10632 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10635 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10636 p1:SD) if p1 is not of floating point class and p0 is spilled as
10637 we can have no analogous movsd_store for this. */
10638 if (lra_in_progress && mode == DDmode
10639 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10640 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10641 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10642 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10644 enum reg_class cl;
10645 int regno = REGNO (SUBREG_REG (operands[1]));
10647 if (regno >= FIRST_PSEUDO_REGISTER)
10649 cl = reg_preferred_class (regno);
10650 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10652 if (regno >= 0 && ! FP_REGNO_P (regno))
10654 mode = SDmode;
10655 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10656 operands[1] = SUBREG_REG (operands[1]);
10659 if (lra_in_progress
10660 && mode == SDmode
10661 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10662 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10663 && (REG_P (operands[1])
10664 || (GET_CODE (operands[1]) == SUBREG
10665 && REG_P (SUBREG_REG (operands[1])))))
10667 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10668 ? SUBREG_REG (operands[1]) : operands[1]);
10669 enum reg_class cl;
10671 if (regno >= FIRST_PSEUDO_REGISTER)
10673 cl = reg_preferred_class (regno);
10674 gcc_assert (cl != NO_REGS);
10675 regno = ira_class_hard_regs[cl][0];
10677 if (FP_REGNO_P (regno))
10679 if (GET_MODE (operands[0]) != DDmode)
10680 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10681 emit_insn (gen_movsd_store (operands[0], operands[1]));
10683 else if (INT_REGNO_P (regno))
10684 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10685 else
10686 gcc_unreachable();
10687 return;
10689 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10690 p:DD)) if p0 is not of floating point class and p1 is spilled as
10691 we can have no analogous movsd_load for this. */
10692 if (lra_in_progress && mode == DDmode
10693 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10694 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10695 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10696 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10698 enum reg_class cl;
10699 int regno = REGNO (SUBREG_REG (operands[0]));
10701 if (regno >= FIRST_PSEUDO_REGISTER)
10703 cl = reg_preferred_class (regno);
10704 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10706 if (regno >= 0 && ! FP_REGNO_P (regno))
10708 mode = SDmode;
10709 operands[0] = SUBREG_REG (operands[0]);
10710 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10713 if (lra_in_progress
10714 && mode == SDmode
10715 && (REG_P (operands[0])
10716 || (GET_CODE (operands[0]) == SUBREG
10717 && REG_P (SUBREG_REG (operands[0]))))
10718 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10719 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10721 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10722 ? SUBREG_REG (operands[0]) : operands[0]);
10723 enum reg_class cl;
10725 if (regno >= FIRST_PSEUDO_REGISTER)
10727 cl = reg_preferred_class (regno);
10728 gcc_assert (cl != NO_REGS);
10729 regno = ira_class_hard_regs[cl][0];
10731 if (FP_REGNO_P (regno))
10733 if (GET_MODE (operands[1]) != DDmode)
10734 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10735 emit_insn (gen_movsd_load (operands[0], operands[1]));
10737 else if (INT_REGNO_P (regno))
10738 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10739 else
10740 gcc_unreachable();
10741 return;
10744 if (reload_in_progress
10745 && mode == SDmode
10746 && cfun->machine->sdmode_stack_slot != NULL_RTX
10747 && MEM_P (operands[0])
10748 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10749 && REG_P (operands[1]))
10751 if (FP_REGNO_P (REGNO (operands[1])))
10753 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10754 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10755 emit_insn (gen_movsd_store (mem, operands[1]));
10757 else if (INT_REGNO_P (REGNO (operands[1])))
10759 rtx mem = operands[0];
10760 if (BYTES_BIG_ENDIAN)
10761 mem = adjust_address_nv (mem, mode, 4);
10762 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10763 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10765 else
10766 gcc_unreachable();
10767 return;
10769 if (reload_in_progress
10770 && mode == SDmode
10771 && REG_P (operands[0])
10772 && MEM_P (operands[1])
10773 && cfun->machine->sdmode_stack_slot != NULL_RTX
10774 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10776 if (FP_REGNO_P (REGNO (operands[0])))
10778 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10779 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10780 emit_insn (gen_movsd_load (operands[0], mem));
10782 else if (INT_REGNO_P (REGNO (operands[0])))
10784 rtx mem = operands[1];
10785 if (BYTES_BIG_ENDIAN)
10786 mem = adjust_address_nv (mem, mode, 4);
10787 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10788 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10790 else
10791 gcc_unreachable();
10792 return;
10795 /* FIXME: In the long term, this switch statement should go away
10796 and be replaced by a sequence of tests based on things like
10797 mode == Pmode. */
10798 switch (mode)
10800 case HImode:
10801 case QImode:
10802 if (CONSTANT_P (operands[1])
10803 && GET_CODE (operands[1]) != CONST_INT)
10804 operands[1] = force_const_mem (mode, operands[1]);
10805 break;
10807 case TFmode:
10808 case TDmode:
10809 case IFmode:
10810 case KFmode:
10811 if (FLOAT128_2REG_P (mode))
10812 rs6000_eliminate_indexed_memrefs (operands);
10813 /* fall through */
10815 case DFmode:
10816 case DDmode:
10817 case SFmode:
10818 case SDmode:
10819 if (CONSTANT_P (operands[1])
10820 && ! easy_fp_constant (operands[1], mode))
10821 operands[1] = force_const_mem (mode, operands[1]);
10822 break;
10824 case V16QImode:
10825 case V8HImode:
10826 case V4SFmode:
10827 case V4SImode:
10828 case V2SFmode:
10829 case V2SImode:
10830 case V2DFmode:
10831 case V2DImode:
10832 case V1TImode:
10833 if (CONSTANT_P (operands[1])
10834 && !easy_vector_constant (operands[1], mode))
10835 operands[1] = force_const_mem (mode, operands[1]);
10836 break;
10838 case SImode:
10839 case DImode:
10840 /* Use default pattern for address of ELF small data */
10841 if (TARGET_ELF
10842 && mode == Pmode
10843 && DEFAULT_ABI == ABI_V4
10844 && (GET_CODE (operands[1]) == SYMBOL_REF
10845 || GET_CODE (operands[1]) == CONST)
10846 && small_data_operand (operands[1], mode))
10848 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10849 return;
10852 if (DEFAULT_ABI == ABI_V4
10853 && mode == Pmode && mode == SImode
10854 && flag_pic == 1 && got_operand (operands[1], mode))
10856 emit_insn (gen_movsi_got (operands[0], operands[1]));
10857 return;
10860 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10861 && TARGET_NO_TOC
10862 && ! flag_pic
10863 && mode == Pmode
10864 && CONSTANT_P (operands[1])
10865 && GET_CODE (operands[1]) != HIGH
10866 && GET_CODE (operands[1]) != CONST_INT)
10868 rtx target = (!can_create_pseudo_p ()
10869 ? operands[0]
10870 : gen_reg_rtx (mode));
10872 /* If this is a function address on -mcall-aixdesc,
10873 convert it to the address of the descriptor. */
10874 if (DEFAULT_ABI == ABI_AIX
10875 && GET_CODE (operands[1]) == SYMBOL_REF
10876 && XSTR (operands[1], 0)[0] == '.')
10878 const char *name = XSTR (operands[1], 0);
10879 rtx new_ref;
10880 while (*name == '.')
10881 name++;
10882 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10883 CONSTANT_POOL_ADDRESS_P (new_ref)
10884 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10885 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10886 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10887 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10888 operands[1] = new_ref;
10891 if (DEFAULT_ABI == ABI_DARWIN)
10893 #if TARGET_MACHO
10894 if (MACHO_DYNAMIC_NO_PIC_P)
10896 /* Take care of any required data indirection. */
10897 operands[1] = rs6000_machopic_legitimize_pic_address (
10898 operands[1], mode, operands[0]);
10899 if (operands[0] != operands[1])
10900 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10901 return;
10903 #endif
10904 emit_insn (gen_macho_high (target, operands[1]));
10905 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10906 return;
10909 emit_insn (gen_elf_high (target, operands[1]));
10910 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10911 return;
10914 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10915 and we have put it in the TOC, we just need to make a TOC-relative
10916 reference to it. */
10917 if (TARGET_TOC
10918 && GET_CODE (operands[1]) == SYMBOL_REF
10919 && use_toc_relative_ref (operands[1], mode))
10920 operands[1] = create_TOC_reference (operands[1], operands[0]);
10921 else if (mode == Pmode
10922 && CONSTANT_P (operands[1])
10923 && GET_CODE (operands[1]) != HIGH
10924 && ((GET_CODE (operands[1]) != CONST_INT
10925 && ! easy_fp_constant (operands[1], mode))
10926 || (GET_CODE (operands[1]) == CONST_INT
10927 && (num_insns_constant (operands[1], mode)
10928 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10929 || (GET_CODE (operands[0]) == REG
10930 && FP_REGNO_P (REGNO (operands[0]))))
10931 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10932 && (TARGET_CMODEL == CMODEL_SMALL
10933 || can_create_pseudo_p ()
10934 || (REG_P (operands[0])
10935 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10938 #if TARGET_MACHO
10939 /* Darwin uses a special PIC legitimizer. */
10940 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10942 operands[1] =
10943 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10944 operands[0]);
10945 if (operands[0] != operands[1])
10946 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10947 return;
10949 #endif
10951 /* If we are to limit the number of things we put in the TOC and
10952 this is a symbol plus a constant we can add in one insn,
10953 just put the symbol in the TOC and add the constant. Don't do
10954 this if reload is in progress. */
10955 if (GET_CODE (operands[1]) == CONST
10956 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10957 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10958 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10959 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10960 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10961 && ! side_effects_p (operands[0]))
10963 rtx sym =
10964 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10965 rtx other = XEXP (XEXP (operands[1], 0), 1);
10967 sym = force_reg (mode, sym);
10968 emit_insn (gen_add3_insn (operands[0], sym, other));
10969 return;
10972 operands[1] = force_const_mem (mode, operands[1]);
10974 if (TARGET_TOC
10975 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10976 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10978 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10979 operands[0]);
10980 operands[1] = gen_const_mem (mode, tocref);
10981 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10984 break;
10986 case TImode:
10987 if (!VECTOR_MEM_VSX_P (TImode))
10988 rs6000_eliminate_indexed_memrefs (operands);
10989 break;
10991 case PTImode:
10992 rs6000_eliminate_indexed_memrefs (operands);
10993 break;
10995 default:
10996 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10999 /* Above, we may have called force_const_mem which may have returned
11000 an invalid address. If we can, fix this up; otherwise, reload will
11001 have to deal with it. */
11002 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11003 operands[1] = validize_mem (operands[1]);
11005 emit_set:
11006 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11009 /* Nonzero if we can use a floating-point register to pass this arg. */
11010 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11011 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11012 && (CUM)->fregno <= FP_ARG_MAX_REG \
11013 && TARGET_HARD_FLOAT)
11015 /* Nonzero if we can use an AltiVec register to pass this arg. */
11016 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11017 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11018 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11019 && TARGET_ALTIVEC_ABI \
11020 && (NAMED))
11022 /* Walk down the type tree of TYPE counting consecutive base elements.
11023 If *MODEP is VOIDmode, then set it to the first valid floating point
11024 or vector type. If a non-floating point or vector type is found, or
11025 if a floating point or vector type that doesn't match a non-VOIDmode
11026 *MODEP is found, then return -1, otherwise return the count in the
11027 sub-tree. */
11029 static int
11030 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11032 machine_mode mode;
11033 HOST_WIDE_INT size;
11035 switch (TREE_CODE (type))
11037 case REAL_TYPE:
11038 mode = TYPE_MODE (type);
11039 if (!SCALAR_FLOAT_MODE_P (mode))
11040 return -1;
11042 if (*modep == VOIDmode)
11043 *modep = mode;
11045 if (*modep == mode)
11046 return 1;
11048 break;
11050 case COMPLEX_TYPE:
11051 mode = TYPE_MODE (TREE_TYPE (type));
11052 if (!SCALAR_FLOAT_MODE_P (mode))
11053 return -1;
11055 if (*modep == VOIDmode)
11056 *modep = mode;
11058 if (*modep == mode)
11059 return 2;
11061 break;
11063 case VECTOR_TYPE:
11064 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11065 return -1;
11067 /* Use V4SImode as representative of all 128-bit vector types. */
11068 size = int_size_in_bytes (type);
11069 switch (size)
11071 case 16:
11072 mode = V4SImode;
11073 break;
11074 default:
11075 return -1;
11078 if (*modep == VOIDmode)
11079 *modep = mode;
11081 /* Vector modes are considered to be opaque: two vectors are
11082 equivalent for the purposes of being homogeneous aggregates
11083 if they are the same size. */
11084 if (*modep == mode)
11085 return 1;
11087 break;
11089 case ARRAY_TYPE:
11091 int count;
11092 tree index = TYPE_DOMAIN (type);
11094 /* Can't handle incomplete types nor sizes that are not
11095 fixed. */
11096 if (!COMPLETE_TYPE_P (type)
11097 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11098 return -1;
11100 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11101 if (count == -1
11102 || !index
11103 || !TYPE_MAX_VALUE (index)
11104 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11105 || !TYPE_MIN_VALUE (index)
11106 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11107 || count < 0)
11108 return -1;
11110 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11111 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11113 /* There must be no padding. */
11114 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11115 return -1;
11117 return count;
11120 case RECORD_TYPE:
11122 int count = 0;
11123 int sub_count;
11124 tree field;
11126 /* Can't handle incomplete types nor sizes that are not
11127 fixed. */
11128 if (!COMPLETE_TYPE_P (type)
11129 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11130 return -1;
11132 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11134 if (TREE_CODE (field) != FIELD_DECL)
11135 continue;
11137 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11138 if (sub_count < 0)
11139 return -1;
11140 count += sub_count;
11143 /* There must be no padding. */
11144 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11145 return -1;
11147 return count;
11150 case UNION_TYPE:
11151 case QUAL_UNION_TYPE:
11153 /* These aren't very interesting except in a degenerate case. */
11154 int count = 0;
11155 int sub_count;
11156 tree field;
11158 /* Can't handle incomplete types nor sizes that are not
11159 fixed. */
11160 if (!COMPLETE_TYPE_P (type)
11161 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11162 return -1;
11164 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11166 if (TREE_CODE (field) != FIELD_DECL)
11167 continue;
11169 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11170 if (sub_count < 0)
11171 return -1;
11172 count = count > sub_count ? count : sub_count;
11175 /* There must be no padding. */
11176 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11177 return -1;
11179 return count;
11182 default:
11183 break;
11186 return -1;
11189 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11190 float or vector aggregate that shall be passed in FP/vector registers
11191 according to the ELFv2 ABI, return the homogeneous element mode in
11192 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11194 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11196 static bool
11197 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11198 machine_mode *elt_mode,
11199 int *n_elts)
11201 /* Note that we do not accept complex types at the top level as
11202 homogeneous aggregates; these types are handled via the
11203 targetm.calls.split_complex_arg mechanism. Complex types
11204 can be elements of homogeneous aggregates, however. */
11205 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11207 machine_mode field_mode = VOIDmode;
11208 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11210 if (field_count > 0)
11212 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11213 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11215 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11216 up to AGGR_ARG_NUM_REG registers. */
11217 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11219 if (elt_mode)
11220 *elt_mode = field_mode;
11221 if (n_elts)
11222 *n_elts = field_count;
11223 return true;
11228 if (elt_mode)
11229 *elt_mode = mode;
11230 if (n_elts)
11231 *n_elts = 1;
11232 return false;
11235 /* Return a nonzero value to say to return the function value in
11236 memory, just as large structures are always returned. TYPE will be
11237 the data type of the value, and FNTYPE will be the type of the
11238 function doing the returning, or @code{NULL} for libcalls.
11240 The AIX ABI for the RS/6000 specifies that all structures are
11241 returned in memory. The Darwin ABI does the same.
11243 For the Darwin 64 Bit ABI, a function result can be returned in
11244 registers or in memory, depending on the size of the return data
11245 type. If it is returned in registers, the value occupies the same
11246 registers as it would if it were the first and only function
11247 argument. Otherwise, the function places its result in memory at
11248 the location pointed to by GPR3.
11250 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11251 but a draft put them in memory, and GCC used to implement the draft
11252 instead of the final standard. Therefore, aix_struct_return
11253 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11254 compatibility can change DRAFT_V4_STRUCT_RET to override the
11255 default, and -m switches get the final word. See
11256 rs6000_option_override_internal for more details.
11258 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11259 long double support is enabled. These values are returned in memory.
11261 int_size_in_bytes returns -1 for variable size objects, which go in
11262 memory always. The cast to unsigned makes -1 > 8. */
11264 static bool
11265 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11267 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11268 if (TARGET_MACHO
11269 && rs6000_darwin64_abi
11270 && TREE_CODE (type) == RECORD_TYPE
11271 && int_size_in_bytes (type) > 0)
11273 CUMULATIVE_ARGS valcum;
11274 rtx valret;
11276 valcum.words = 0;
11277 valcum.fregno = FP_ARG_MIN_REG;
11278 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11279 /* Do a trial code generation as if this were going to be passed
11280 as an argument; if any part goes in memory, we return NULL. */
11281 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11282 if (valret)
11283 return false;
11284 /* Otherwise fall through to more conventional ABI rules. */
11287 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11288 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11289 NULL, NULL))
11290 return false;
11292 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11293 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11294 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11295 return false;
11297 if (AGGREGATE_TYPE_P (type)
11298 && (aix_struct_return
11299 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11300 return true;
11302 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11303 modes only exist for GCC vector types if -maltivec. */
11304 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11305 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11306 return false;
11308 /* Return synthetic vectors in memory. */
11309 if (TREE_CODE (type) == VECTOR_TYPE
11310 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11312 static bool warned_for_return_big_vectors = false;
11313 if (!warned_for_return_big_vectors)
11315 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11316 "non-standard ABI extension with no compatibility guarantee");
11317 warned_for_return_big_vectors = true;
11319 return true;
11322 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11323 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11324 return true;
11326 return false;
11329 /* Specify whether values returned in registers should be at the most
11330 significant end of a register. We want aggregates returned by
11331 value to match the way aggregates are passed to functions. */
11333 static bool
11334 rs6000_return_in_msb (const_tree valtype)
11336 return (DEFAULT_ABI == ABI_ELFv2
11337 && BYTES_BIG_ENDIAN
11338 && AGGREGATE_TYPE_P (valtype)
11339 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11342 #ifdef HAVE_AS_GNU_ATTRIBUTE
11343 /* Return TRUE if a call to function FNDECL may be one that
11344 potentially affects the function calling ABI of the object file. */
11346 static bool
11347 call_ABI_of_interest (tree fndecl)
11349 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11351 struct cgraph_node *c_node;
11353 /* Libcalls are always interesting. */
11354 if (fndecl == NULL_TREE)
11355 return true;
11357 /* Any call to an external function is interesting. */
11358 if (DECL_EXTERNAL (fndecl))
11359 return true;
11361 /* Interesting functions that we are emitting in this object file. */
11362 c_node = cgraph_node::get (fndecl);
11363 c_node = c_node->ultimate_alias_target ();
11364 return !c_node->only_called_directly_p ();
11366 return false;
11368 #endif
11370 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11371 for a call to a function whose data type is FNTYPE.
11372 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11374 For incoming args we set the number of arguments in the prototype large
11375 so we never return a PARALLEL. */
11377 void
11378 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11379 rtx libname ATTRIBUTE_UNUSED, int incoming,
11380 int libcall, int n_named_args,
11381 tree fndecl ATTRIBUTE_UNUSED,
11382 machine_mode return_mode ATTRIBUTE_UNUSED)
11384 static CUMULATIVE_ARGS zero_cumulative;
11386 *cum = zero_cumulative;
11387 cum->words = 0;
11388 cum->fregno = FP_ARG_MIN_REG;
11389 cum->vregno = ALTIVEC_ARG_MIN_REG;
11390 cum->prototype = (fntype && prototype_p (fntype));
11391 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11392 ? CALL_LIBCALL : CALL_NORMAL);
11393 cum->sysv_gregno = GP_ARG_MIN_REG;
11394 cum->stdarg = stdarg_p (fntype);
11395 cum->libcall = libcall;
11397 cum->nargs_prototype = 0;
11398 if (incoming || cum->prototype)
11399 cum->nargs_prototype = n_named_args;
11401 /* Check for a longcall attribute. */
11402 if ((!fntype && rs6000_default_long_calls)
11403 || (fntype
11404 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11405 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11406 cum->call_cookie |= CALL_LONG;
11408 if (TARGET_DEBUG_ARG)
11410 fprintf (stderr, "\ninit_cumulative_args:");
11411 if (fntype)
11413 tree ret_type = TREE_TYPE (fntype);
11414 fprintf (stderr, " ret code = %s,",
11415 get_tree_code_name (TREE_CODE (ret_type)));
11418 if (cum->call_cookie & CALL_LONG)
11419 fprintf (stderr, " longcall,");
11421 fprintf (stderr, " proto = %d, nargs = %d\n",
11422 cum->prototype, cum->nargs_prototype);
11425 #ifdef HAVE_AS_GNU_ATTRIBUTE
11426 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11428 cum->escapes = call_ABI_of_interest (fndecl);
11429 if (cum->escapes)
11431 tree return_type;
11433 if (fntype)
11435 return_type = TREE_TYPE (fntype);
11436 return_mode = TYPE_MODE (return_type);
11438 else
11439 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11441 if (return_type != NULL)
11443 if (TREE_CODE (return_type) == RECORD_TYPE
11444 && TYPE_TRANSPARENT_AGGR (return_type))
11446 return_type = TREE_TYPE (first_field (return_type));
11447 return_mode = TYPE_MODE (return_type);
11449 if (AGGREGATE_TYPE_P (return_type)
11450 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11451 <= 8))
11452 rs6000_returns_struct = true;
11454 if (SCALAR_FLOAT_MODE_P (return_mode))
11456 rs6000_passes_float = true;
11457 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11458 && (FLOAT128_IBM_P (return_mode)
11459 || FLOAT128_IEEE_P (return_mode)
11460 || (return_type != NULL
11461 && (TYPE_MAIN_VARIANT (return_type)
11462 == long_double_type_node))))
11463 rs6000_passes_long_double = true;
11465 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11466 || PAIRED_VECTOR_MODE (return_mode))
11467 rs6000_passes_vector = true;
11470 #endif
11472 if (fntype
11473 && !TARGET_ALTIVEC
11474 && TARGET_ALTIVEC_ABI
11475 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11477 error ("cannot return value in vector register because"
11478 " altivec instructions are disabled, use -maltivec"
11479 " to enable them");
11483 /* The mode the ABI uses for a word. This is not the same as word_mode
11484 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11486 static machine_mode
11487 rs6000_abi_word_mode (void)
11489 return TARGET_32BIT ? SImode : DImode;
11492 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11493 static char *
11494 rs6000_offload_options (void)
11496 if (TARGET_64BIT)
11497 return xstrdup ("-foffload-abi=lp64");
11498 else
11499 return xstrdup ("-foffload-abi=ilp32");
11502 /* On rs6000, function arguments are promoted, as are function return
11503 values. */
11505 static machine_mode
11506 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11507 machine_mode mode,
11508 int *punsignedp ATTRIBUTE_UNUSED,
11509 const_tree, int)
11511 PROMOTE_MODE (mode, *punsignedp, type);
11513 return mode;
11516 /* Return true if TYPE must be passed on the stack and not in registers. */
11518 static bool
11519 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11521 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11522 return must_pass_in_stack_var_size (mode, type);
11523 else
11524 return must_pass_in_stack_var_size_or_pad (mode, type);
11527 static inline bool
11528 is_complex_IBM_long_double (machine_mode mode)
11530 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11533 /* Whether ABI_V4 passes MODE args to a function in floating point
11534 registers. */
11536 static bool
11537 abi_v4_pass_in_fpr (machine_mode mode)
11539 if (!TARGET_HARD_FLOAT)
11540 return false;
11541 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11542 return true;
11543 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11544 return true;
11545 /* ABI_V4 passes complex IBM long double in 8 gprs.
11546 Stupid, but we can't change the ABI now. */
11547 if (is_complex_IBM_long_double (mode))
11548 return false;
11549 if (FLOAT128_2REG_P (mode))
11550 return true;
11551 if (DECIMAL_FLOAT_MODE_P (mode))
11552 return true;
11553 return false;
11556 /* If defined, a C expression which determines whether, and in which
11557 direction, to pad out an argument with extra space. The value
11558 should be of type `enum direction': either `upward' to pad above
11559 the argument, `downward' to pad below, or `none' to inhibit
11560 padding.
11562 For the AIX ABI structs are always stored left shifted in their
11563 argument slot. */
11565 enum direction
11566 function_arg_padding (machine_mode mode, const_tree type)
11568 #ifndef AGGREGATE_PADDING_FIXED
11569 #define AGGREGATE_PADDING_FIXED 0
11570 #endif
11571 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11572 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11573 #endif
11575 if (!AGGREGATE_PADDING_FIXED)
11577 /* GCC used to pass structures of the same size as integer types as
11578 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11579 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11580 passed padded downward, except that -mstrict-align further
11581 muddied the water in that multi-component structures of 2 and 4
11582 bytes in size were passed padded upward.
11584 The following arranges for best compatibility with previous
11585 versions of gcc, but removes the -mstrict-align dependency. */
11586 if (BYTES_BIG_ENDIAN)
11588 HOST_WIDE_INT size = 0;
11590 if (mode == BLKmode)
11592 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11593 size = int_size_in_bytes (type);
11595 else
11596 size = GET_MODE_SIZE (mode);
11598 if (size == 1 || size == 2 || size == 4)
11599 return downward;
11601 return upward;
11604 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11606 if (type != 0 && AGGREGATE_TYPE_P (type))
11607 return upward;
11610 /* Fall back to the default. */
11611 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11614 /* If defined, a C expression that gives the alignment boundary, in bits,
11615 of an argument with the specified mode and type. If it is not defined,
11616 PARM_BOUNDARY is used for all arguments.
11618 V.4 wants long longs and doubles to be double word aligned. Just
11619 testing the mode size is a boneheaded way to do this as it means
11620 that other types such as complex int are also double word aligned.
11621 However, we're stuck with this because changing the ABI might break
11622 existing library interfaces.
11624 Quadword align Altivec/VSX vectors.
11625 Quadword align large synthetic vector types. */
11627 static unsigned int
11628 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11630 machine_mode elt_mode;
11631 int n_elts;
11633 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11635 if (DEFAULT_ABI == ABI_V4
11636 && (GET_MODE_SIZE (mode) == 8
11637 || (TARGET_HARD_FLOAT
11638 && !is_complex_IBM_long_double (mode)
11639 && FLOAT128_2REG_P (mode))))
11640 return 64;
11641 else if (FLOAT128_VECTOR_P (mode))
11642 return 128;
11643 else if (PAIRED_VECTOR_MODE (mode)
11644 || (type && TREE_CODE (type) == VECTOR_TYPE
11645 && int_size_in_bytes (type) >= 8
11646 && int_size_in_bytes (type) < 16))
11647 return 64;
11648 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11649 || (type && TREE_CODE (type) == VECTOR_TYPE
11650 && int_size_in_bytes (type) >= 16))
11651 return 128;
11653 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11654 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11655 -mcompat-align-parm is used. */
11656 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11657 || DEFAULT_ABI == ABI_ELFv2)
11658 && type && TYPE_ALIGN (type) > 64)
11660 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11661 or homogeneous float/vector aggregates here. We already handled
11662 vector aggregates above, but still need to check for float here. */
11663 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11664 && !SCALAR_FLOAT_MODE_P (elt_mode));
11666 /* We used to check for BLKmode instead of the above aggregate type
11667 check. Warn when this results in any difference to the ABI. */
11668 if (aggregate_p != (mode == BLKmode))
11670 static bool warned;
11671 if (!warned && warn_psabi)
11673 warned = true;
11674 inform (input_location,
11675 "the ABI of passing aggregates with %d-byte alignment"
11676 " has changed in GCC 5",
11677 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11681 if (aggregate_p)
11682 return 128;
11685 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11686 implement the "aggregate type" check as a BLKmode check here; this
11687 means certain aggregate types are in fact not aligned. */
11688 if (TARGET_MACHO && rs6000_darwin64_abi
11689 && mode == BLKmode
11690 && type && TYPE_ALIGN (type) > 64)
11691 return 128;
11693 return PARM_BOUNDARY;
11696 /* The offset in words to the start of the parameter save area. */
11698 static unsigned int
11699 rs6000_parm_offset (void)
11701 return (DEFAULT_ABI == ABI_V4 ? 2
11702 : DEFAULT_ABI == ABI_ELFv2 ? 4
11703 : 6);
11706 /* For a function parm of MODE and TYPE, return the starting word in
11707 the parameter area. NWORDS of the parameter area are already used. */
11709 static unsigned int
11710 rs6000_parm_start (machine_mode mode, const_tree type,
11711 unsigned int nwords)
11713 unsigned int align;
11715 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11716 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11719 /* Compute the size (in words) of a function argument. */
11721 static unsigned long
11722 rs6000_arg_size (machine_mode mode, const_tree type)
11724 unsigned long size;
11726 if (mode != BLKmode)
11727 size = GET_MODE_SIZE (mode);
11728 else
11729 size = int_size_in_bytes (type);
11731 if (TARGET_32BIT)
11732 return (size + 3) >> 2;
11733 else
11734 return (size + 7) >> 3;
11737 /* Use this to flush pending int fields. */
11739 static void
11740 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11741 HOST_WIDE_INT bitpos, int final)
11743 unsigned int startbit, endbit;
11744 int intregs, intoffset;
11745 machine_mode mode;
11747 /* Handle the situations where a float is taking up the first half
11748 of the GPR, and the other half is empty (typically due to
11749 alignment restrictions). We can detect this by a 8-byte-aligned
11750 int field, or by seeing that this is the final flush for this
11751 argument. Count the word and continue on. */
11752 if (cum->floats_in_gpr == 1
11753 && (cum->intoffset % 64 == 0
11754 || (cum->intoffset == -1 && final)))
11756 cum->words++;
11757 cum->floats_in_gpr = 0;
11760 if (cum->intoffset == -1)
11761 return;
11763 intoffset = cum->intoffset;
11764 cum->intoffset = -1;
11765 cum->floats_in_gpr = 0;
11767 if (intoffset % BITS_PER_WORD != 0)
11769 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11770 MODE_INT, 0);
11771 if (mode == BLKmode)
11773 /* We couldn't find an appropriate mode, which happens,
11774 e.g., in packed structs when there are 3 bytes to load.
11775 Back intoffset back to the beginning of the word in this
11776 case. */
11777 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11781 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11782 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11783 intregs = (endbit - startbit) / BITS_PER_WORD;
11784 cum->words += intregs;
11785 /* words should be unsigned. */
11786 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11788 int pad = (endbit/BITS_PER_WORD) - cum->words;
11789 cum->words += pad;
11793 /* The darwin64 ABI calls for us to recurse down through structs,
11794 looking for elements passed in registers. Unfortunately, we have
11795 to track int register count here also because of misalignments
11796 in powerpc alignment mode. */
11798 static void
11799 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11800 const_tree type,
11801 HOST_WIDE_INT startbitpos)
11803 tree f;
11805 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11806 if (TREE_CODE (f) == FIELD_DECL)
11808 HOST_WIDE_INT bitpos = startbitpos;
11809 tree ftype = TREE_TYPE (f);
11810 machine_mode mode;
11811 if (ftype == error_mark_node)
11812 continue;
11813 mode = TYPE_MODE (ftype);
11815 if (DECL_SIZE (f) != 0
11816 && tree_fits_uhwi_p (bit_position (f)))
11817 bitpos += int_bit_position (f);
11819 /* ??? FIXME: else assume zero offset. */
11821 if (TREE_CODE (ftype) == RECORD_TYPE)
11822 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11823 else if (USE_FP_FOR_ARG_P (cum, mode))
11825 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11826 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11827 cum->fregno += n_fpregs;
11828 /* Single-precision floats present a special problem for
11829 us, because they are smaller than an 8-byte GPR, and so
11830 the structure-packing rules combined with the standard
11831 varargs behavior mean that we want to pack float/float
11832 and float/int combinations into a single register's
11833 space. This is complicated by the arg advance flushing,
11834 which works on arbitrarily large groups of int-type
11835 fields. */
11836 if (mode == SFmode)
11838 if (cum->floats_in_gpr == 1)
11840 /* Two floats in a word; count the word and reset
11841 the float count. */
11842 cum->words++;
11843 cum->floats_in_gpr = 0;
11845 else if (bitpos % 64 == 0)
11847 /* A float at the beginning of an 8-byte word;
11848 count it and put off adjusting cum->words until
11849 we see if a arg advance flush is going to do it
11850 for us. */
11851 cum->floats_in_gpr++;
11853 else
11855 /* The float is at the end of a word, preceded
11856 by integer fields, so the arg advance flush
11857 just above has already set cum->words and
11858 everything is taken care of. */
11861 else
11862 cum->words += n_fpregs;
11864 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11866 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11867 cum->vregno++;
11868 cum->words += 2;
11870 else if (cum->intoffset == -1)
11871 cum->intoffset = bitpos;
11875 /* Check for an item that needs to be considered specially under the darwin 64
11876 bit ABI. These are record types where the mode is BLK or the structure is
11877 8 bytes in size. */
11878 static int
11879 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11881 return rs6000_darwin64_abi
11882 && ((mode == BLKmode
11883 && TREE_CODE (type) == RECORD_TYPE
11884 && int_size_in_bytes (type) > 0)
11885 || (type && TREE_CODE (type) == RECORD_TYPE
11886 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11889 /* Update the data in CUM to advance over an argument
11890 of mode MODE and data type TYPE.
11891 (TYPE is null for libcalls where that information may not be available.)
11893 Note that for args passed by reference, function_arg will be called
11894 with MODE and TYPE set to that of the pointer to the arg, not the arg
11895 itself. */
11897 static void
11898 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11899 const_tree type, bool named, int depth)
11901 machine_mode elt_mode;
11902 int n_elts;
11904 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11906 /* Only tick off an argument if we're not recursing. */
11907 if (depth == 0)
11908 cum->nargs_prototype--;
11910 #ifdef HAVE_AS_GNU_ATTRIBUTE
11911 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11912 && cum->escapes)
11914 if (SCALAR_FLOAT_MODE_P (mode))
11916 rs6000_passes_float = true;
11917 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11918 && (FLOAT128_IBM_P (mode)
11919 || FLOAT128_IEEE_P (mode)
11920 || (type != NULL
11921 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11922 rs6000_passes_long_double = true;
11924 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11925 || (PAIRED_VECTOR_MODE (mode)
11926 && !cum->stdarg
11927 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11928 rs6000_passes_vector = true;
11930 #endif
11932 if (TARGET_ALTIVEC_ABI
11933 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11934 || (type && TREE_CODE (type) == VECTOR_TYPE
11935 && int_size_in_bytes (type) == 16)))
11937 bool stack = false;
11939 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11941 cum->vregno += n_elts;
11943 if (!TARGET_ALTIVEC)
11944 error ("cannot pass argument in vector register because"
11945 " altivec instructions are disabled, use -maltivec"
11946 " to enable them");
11948 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11949 even if it is going to be passed in a vector register.
11950 Darwin does the same for variable-argument functions. */
11951 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11952 && TARGET_64BIT)
11953 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11954 stack = true;
11956 else
11957 stack = true;
11959 if (stack)
11961 int align;
11963 /* Vector parameters must be 16-byte aligned. In 32-bit
11964 mode this means we need to take into account the offset
11965 to the parameter save area. In 64-bit mode, they just
11966 have to start on an even word, since the parameter save
11967 area is 16-byte aligned. */
11968 if (TARGET_32BIT)
11969 align = -(rs6000_parm_offset () + cum->words) & 3;
11970 else
11971 align = cum->words & 1;
11972 cum->words += align + rs6000_arg_size (mode, type);
11974 if (TARGET_DEBUG_ARG)
11976 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11977 cum->words, align);
11978 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11979 cum->nargs_prototype, cum->prototype,
11980 GET_MODE_NAME (mode));
11984 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11986 int size = int_size_in_bytes (type);
11987 /* Variable sized types have size == -1 and are
11988 treated as if consisting entirely of ints.
11989 Pad to 16 byte boundary if needed. */
11990 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11991 && (cum->words % 2) != 0)
11992 cum->words++;
11993 /* For varargs, we can just go up by the size of the struct. */
11994 if (!named)
11995 cum->words += (size + 7) / 8;
11996 else
11998 /* It is tempting to say int register count just goes up by
11999 sizeof(type)/8, but this is wrong in a case such as
12000 { int; double; int; } [powerpc alignment]. We have to
12001 grovel through the fields for these too. */
12002 cum->intoffset = 0;
12003 cum->floats_in_gpr = 0;
12004 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12005 rs6000_darwin64_record_arg_advance_flush (cum,
12006 size * BITS_PER_UNIT, 1);
12008 if (TARGET_DEBUG_ARG)
12010 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12011 cum->words, TYPE_ALIGN (type), size);
12012 fprintf (stderr,
12013 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12014 cum->nargs_prototype, cum->prototype,
12015 GET_MODE_NAME (mode));
12018 else if (DEFAULT_ABI == ABI_V4)
12020 if (abi_v4_pass_in_fpr (mode))
12022 /* _Decimal128 must use an even/odd register pair. This assumes
12023 that the register number is odd when fregno is odd. */
12024 if (mode == TDmode && (cum->fregno % 2) == 1)
12025 cum->fregno++;
12027 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12028 <= FP_ARG_V4_MAX_REG)
12029 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12030 else
12032 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12033 if (mode == DFmode || FLOAT128_IBM_P (mode)
12034 || mode == DDmode || mode == TDmode)
12035 cum->words += cum->words & 1;
12036 cum->words += rs6000_arg_size (mode, type);
12039 else
12041 int n_words = rs6000_arg_size (mode, type);
12042 int gregno = cum->sysv_gregno;
12044 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12045 As does any other 2 word item such as complex int due to a
12046 historical mistake. */
12047 if (n_words == 2)
12048 gregno += (1 - gregno) & 1;
12050 /* Multi-reg args are not split between registers and stack. */
12051 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12053 /* Long long is aligned on the stack. So are other 2 word
12054 items such as complex int due to a historical mistake. */
12055 if (n_words == 2)
12056 cum->words += cum->words & 1;
12057 cum->words += n_words;
12060 /* Note: continuing to accumulate gregno past when we've started
12061 spilling to the stack indicates the fact that we've started
12062 spilling to the stack to expand_builtin_saveregs. */
12063 cum->sysv_gregno = gregno + n_words;
12066 if (TARGET_DEBUG_ARG)
12068 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12069 cum->words, cum->fregno);
12070 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12071 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12072 fprintf (stderr, "mode = %4s, named = %d\n",
12073 GET_MODE_NAME (mode), named);
12076 else
12078 int n_words = rs6000_arg_size (mode, type);
12079 int start_words = cum->words;
12080 int align_words = rs6000_parm_start (mode, type, start_words);
12082 cum->words = align_words + n_words;
12084 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
12086 /* _Decimal128 must be passed in an even/odd float register pair.
12087 This assumes that the register number is odd when fregno is
12088 odd. */
12089 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12090 cum->fregno++;
12091 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12094 if (TARGET_DEBUG_ARG)
12096 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12097 cum->words, cum->fregno);
12098 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12099 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12100 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12101 named, align_words - start_words, depth);
12106 static void
12107 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12108 const_tree type, bool named)
12110 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12114 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12115 structure between cum->intoffset and bitpos to integer registers. */
12117 static void
12118 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12119 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12121 machine_mode mode;
12122 unsigned int regno;
12123 unsigned int startbit, endbit;
12124 int this_regno, intregs, intoffset;
12125 rtx reg;
12127 if (cum->intoffset == -1)
12128 return;
12130 intoffset = cum->intoffset;
12131 cum->intoffset = -1;
12133 /* If this is the trailing part of a word, try to only load that
12134 much into the register. Otherwise load the whole register. Note
12135 that in the latter case we may pick up unwanted bits. It's not a
12136 problem at the moment but may wish to revisit. */
12138 if (intoffset % BITS_PER_WORD != 0)
12140 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
12141 MODE_INT, 0);
12142 if (mode == BLKmode)
12144 /* We couldn't find an appropriate mode, which happens,
12145 e.g., in packed structs when there are 3 bytes to load.
12146 Back intoffset back to the beginning of the word in this
12147 case. */
12148 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12149 mode = word_mode;
12152 else
12153 mode = word_mode;
12155 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12156 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12157 intregs = (endbit - startbit) / BITS_PER_WORD;
12158 this_regno = cum->words + intoffset / BITS_PER_WORD;
12160 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12161 cum->use_stack = 1;
12163 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12164 if (intregs <= 0)
12165 return;
12167 intoffset /= BITS_PER_UNIT;
12170 regno = GP_ARG_MIN_REG + this_regno;
12171 reg = gen_rtx_REG (mode, regno);
12172 rvec[(*k)++] =
12173 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12175 this_regno += 1;
12176 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12177 mode = word_mode;
12178 intregs -= 1;
12180 while (intregs > 0);
12183 /* Recursive workhorse for the following. */
12185 static void
12186 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12187 HOST_WIDE_INT startbitpos, rtx rvec[],
12188 int *k)
12190 tree f;
12192 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12193 if (TREE_CODE (f) == FIELD_DECL)
12195 HOST_WIDE_INT bitpos = startbitpos;
12196 tree ftype = TREE_TYPE (f);
12197 machine_mode mode;
12198 if (ftype == error_mark_node)
12199 continue;
12200 mode = TYPE_MODE (ftype);
12202 if (DECL_SIZE (f) != 0
12203 && tree_fits_uhwi_p (bit_position (f)))
12204 bitpos += int_bit_position (f);
12206 /* ??? FIXME: else assume zero offset. */
12208 if (TREE_CODE (ftype) == RECORD_TYPE)
12209 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12210 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12212 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12213 #if 0
12214 switch (mode)
12216 case SCmode: mode = SFmode; break;
12217 case DCmode: mode = DFmode; break;
12218 case TCmode: mode = TFmode; break;
12219 default: break;
12221 #endif
12222 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12223 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12225 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12226 && (mode == TFmode || mode == TDmode));
12227 /* Long double or _Decimal128 split over regs and memory. */
12228 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12229 cum->use_stack=1;
12231 rvec[(*k)++]
12232 = gen_rtx_EXPR_LIST (VOIDmode,
12233 gen_rtx_REG (mode, cum->fregno++),
12234 GEN_INT (bitpos / BITS_PER_UNIT));
12235 if (FLOAT128_2REG_P (mode))
12236 cum->fregno++;
12238 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12240 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12241 rvec[(*k)++]
12242 = gen_rtx_EXPR_LIST (VOIDmode,
12243 gen_rtx_REG (mode, cum->vregno++),
12244 GEN_INT (bitpos / BITS_PER_UNIT));
12246 else if (cum->intoffset == -1)
12247 cum->intoffset = bitpos;
12251 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12252 the register(s) to be used for each field and subfield of a struct
12253 being passed by value, along with the offset of where the
12254 register's value may be found in the block. FP fields go in FP
12255 register, vector fields go in vector registers, and everything
12256 else goes in int registers, packed as in memory.
12258 This code is also used for function return values. RETVAL indicates
12259 whether this is the case.
12261 Much of this is taken from the SPARC V9 port, which has a similar
12262 calling convention. */
12264 static rtx
12265 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12266 bool named, bool retval)
12268 rtx rvec[FIRST_PSEUDO_REGISTER];
12269 int k = 1, kbase = 1;
12270 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12271 /* This is a copy; modifications are not visible to our caller. */
12272 CUMULATIVE_ARGS copy_cum = *orig_cum;
12273 CUMULATIVE_ARGS *cum = &copy_cum;
12275 /* Pad to 16 byte boundary if needed. */
12276 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12277 && (cum->words % 2) != 0)
12278 cum->words++;
12280 cum->intoffset = 0;
12281 cum->use_stack = 0;
12282 cum->named = named;
12284 /* Put entries into rvec[] for individual FP and vector fields, and
12285 for the chunks of memory that go in int regs. Note we start at
12286 element 1; 0 is reserved for an indication of using memory, and
12287 may or may not be filled in below. */
12288 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12289 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12291 /* If any part of the struct went on the stack put all of it there.
12292 This hack is because the generic code for
12293 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12294 parts of the struct are not at the beginning. */
12295 if (cum->use_stack)
12297 if (retval)
12298 return NULL_RTX; /* doesn't go in registers at all */
12299 kbase = 0;
12300 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12302 if (k > 1 || cum->use_stack)
12303 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12304 else
12305 return NULL_RTX;
12308 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12310 static rtx
12311 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12312 int align_words)
12314 int n_units;
12315 int i, k;
12316 rtx rvec[GP_ARG_NUM_REG + 1];
12318 if (align_words >= GP_ARG_NUM_REG)
12319 return NULL_RTX;
12321 n_units = rs6000_arg_size (mode, type);
12323 /* Optimize the simple case where the arg fits in one gpr, except in
12324 the case of BLKmode due to assign_parms assuming that registers are
12325 BITS_PER_WORD wide. */
12326 if (n_units == 0
12327 || (n_units == 1 && mode != BLKmode))
12328 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12330 k = 0;
12331 if (align_words + n_units > GP_ARG_NUM_REG)
12332 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12333 using a magic NULL_RTX component.
12334 This is not strictly correct. Only some of the arg belongs in
12335 memory, not all of it. However, the normal scheme using
12336 function_arg_partial_nregs can result in unusual subregs, eg.
12337 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12338 store the whole arg to memory is often more efficient than code
12339 to store pieces, and we know that space is available in the right
12340 place for the whole arg. */
12341 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12343 i = 0;
12346 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12347 rtx off = GEN_INT (i++ * 4);
12348 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12350 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12352 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12355 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12356 but must also be copied into the parameter save area starting at
12357 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12358 to the GPRs and/or memory. Return the number of elements used. */
12360 static int
12361 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12362 int align_words, rtx *rvec)
12364 int k = 0;
12366 if (align_words < GP_ARG_NUM_REG)
12368 int n_words = rs6000_arg_size (mode, type);
12370 if (align_words + n_words > GP_ARG_NUM_REG
12371 || mode == BLKmode
12372 || (TARGET_32BIT && TARGET_POWERPC64))
12374 /* If this is partially on the stack, then we only
12375 include the portion actually in registers here. */
12376 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12377 int i = 0;
12379 if (align_words + n_words > GP_ARG_NUM_REG)
12381 /* Not all of the arg fits in gprs. Say that it goes in memory
12382 too, using a magic NULL_RTX component. Also see comment in
12383 rs6000_mixed_function_arg for why the normal
12384 function_arg_partial_nregs scheme doesn't work in this case. */
12385 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12390 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12391 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12392 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12394 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12396 else
12398 /* The whole arg fits in gprs. */
12399 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12400 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12403 else
12405 /* It's entirely in memory. */
12406 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12409 return k;
12412 /* RVEC is a vector of K components of an argument of mode MODE.
12413 Construct the final function_arg return value from it. */
12415 static rtx
12416 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12418 gcc_assert (k >= 1);
12420 /* Avoid returning a PARALLEL in the trivial cases. */
12421 if (k == 1)
12423 if (XEXP (rvec[0], 0) == NULL_RTX)
12424 return NULL_RTX;
12426 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12427 return XEXP (rvec[0], 0);
12430 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12433 /* Determine where to put an argument to a function.
12434 Value is zero to push the argument on the stack,
12435 or a hard register in which to store the argument.
12437 MODE is the argument's machine mode.
12438 TYPE is the data type of the argument (as a tree).
12439 This is null for libcalls where that information may
12440 not be available.
12441 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12442 the preceding args and about the function being called. It is
12443 not modified in this routine.
12444 NAMED is nonzero if this argument is a named parameter
12445 (otherwise it is an extra parameter matching an ellipsis).
12447 On RS/6000 the first eight words of non-FP are normally in registers
12448 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12449 Under V.4, the first 8 FP args are in registers.
12451 If this is floating-point and no prototype is specified, we use
12452 both an FP and integer register (or possibly FP reg and stack). Library
12453 functions (when CALL_LIBCALL is set) always have the proper types for args,
12454 so we can pass the FP value just in one register. emit_library_function
12455 doesn't support PARALLEL anyway.
12457 Note that for args passed by reference, function_arg will be called
12458 with MODE and TYPE set to that of the pointer to the arg, not the arg
12459 itself. */
12461 static rtx
12462 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12463 const_tree type, bool named)
12465 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12466 enum rs6000_abi abi = DEFAULT_ABI;
12467 machine_mode elt_mode;
12468 int n_elts;
12470 /* Return a marker to indicate whether CR1 needs to set or clear the
12471 bit that V.4 uses to say fp args were passed in registers.
12472 Assume that we don't need the marker for software floating point,
12473 or compiler generated library calls. */
12474 if (mode == VOIDmode)
12476 if (abi == ABI_V4
12477 && (cum->call_cookie & CALL_LIBCALL) == 0
12478 && (cum->stdarg
12479 || (cum->nargs_prototype < 0
12480 && (cum->prototype || TARGET_NO_PROTOTYPE)))
12481 && TARGET_HARD_FLOAT)
12482 return GEN_INT (cum->call_cookie
12483 | ((cum->fregno == FP_ARG_MIN_REG)
12484 ? CALL_V4_SET_FP_ARGS
12485 : CALL_V4_CLEAR_FP_ARGS));
12487 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12490 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12492 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12494 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12495 if (rslt != NULL_RTX)
12496 return rslt;
12497 /* Else fall through to usual handling. */
12500 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12502 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12503 rtx r, off;
12504 int i, k = 0;
12506 /* Do we also need to pass this argument in the parameter save area?
12507 Library support functions for IEEE 128-bit are assumed to not need the
12508 value passed both in GPRs and in vector registers. */
12509 if (TARGET_64BIT && !cum->prototype
12510 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12512 int align_words = ROUND_UP (cum->words, 2);
12513 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12516 /* Describe where this argument goes in the vector registers. */
12517 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12519 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12520 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12521 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12524 return rs6000_finish_function_arg (mode, rvec, k);
12526 else if (TARGET_ALTIVEC_ABI
12527 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12528 || (type && TREE_CODE (type) == VECTOR_TYPE
12529 && int_size_in_bytes (type) == 16)))
12531 if (named || abi == ABI_V4)
12532 return NULL_RTX;
12533 else
12535 /* Vector parameters to varargs functions under AIX or Darwin
12536 get passed in memory and possibly also in GPRs. */
12537 int align, align_words, n_words;
12538 machine_mode part_mode;
12540 /* Vector parameters must be 16-byte aligned. In 32-bit
12541 mode this means we need to take into account the offset
12542 to the parameter save area. In 64-bit mode, they just
12543 have to start on an even word, since the parameter save
12544 area is 16-byte aligned. */
12545 if (TARGET_32BIT)
12546 align = -(rs6000_parm_offset () + cum->words) & 3;
12547 else
12548 align = cum->words & 1;
12549 align_words = cum->words + align;
12551 /* Out of registers? Memory, then. */
12552 if (align_words >= GP_ARG_NUM_REG)
12553 return NULL_RTX;
12555 if (TARGET_32BIT && TARGET_POWERPC64)
12556 return rs6000_mixed_function_arg (mode, type, align_words);
12558 /* The vector value goes in GPRs. Only the part of the
12559 value in GPRs is reported here. */
12560 part_mode = mode;
12561 n_words = rs6000_arg_size (mode, type);
12562 if (align_words + n_words > GP_ARG_NUM_REG)
12563 /* Fortunately, there are only two possibilities, the value
12564 is either wholly in GPRs or half in GPRs and half not. */
12565 part_mode = DImode;
12567 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12571 else if (abi == ABI_V4)
12573 if (abi_v4_pass_in_fpr (mode))
12575 /* _Decimal128 must use an even/odd register pair. This assumes
12576 that the register number is odd when fregno is odd. */
12577 if (mode == TDmode && (cum->fregno % 2) == 1)
12578 cum->fregno++;
12580 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12581 <= FP_ARG_V4_MAX_REG)
12582 return gen_rtx_REG (mode, cum->fregno);
12583 else
12584 return NULL_RTX;
12586 else
12588 int n_words = rs6000_arg_size (mode, type);
12589 int gregno = cum->sysv_gregno;
12591 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12592 As does any other 2 word item such as complex int due to a
12593 historical mistake. */
12594 if (n_words == 2)
12595 gregno += (1 - gregno) & 1;
12597 /* Multi-reg args are not split between registers and stack. */
12598 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12599 return NULL_RTX;
12601 if (TARGET_32BIT && TARGET_POWERPC64)
12602 return rs6000_mixed_function_arg (mode, type,
12603 gregno - GP_ARG_MIN_REG);
12604 return gen_rtx_REG (mode, gregno);
12607 else
12609 int align_words = rs6000_parm_start (mode, type, cum->words);
12611 /* _Decimal128 must be passed in an even/odd float register pair.
12612 This assumes that the register number is odd when fregno is odd. */
12613 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12614 cum->fregno++;
12616 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12618 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12619 rtx r, off;
12620 int i, k = 0;
12621 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12622 int fpr_words;
12624 /* Do we also need to pass this argument in the parameter
12625 save area? */
12626 if (type && (cum->nargs_prototype <= 0
12627 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12628 && TARGET_XL_COMPAT
12629 && align_words >= GP_ARG_NUM_REG)))
12630 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12632 /* Describe where this argument goes in the fprs. */
12633 for (i = 0; i < n_elts
12634 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12636 /* Check if the argument is split over registers and memory.
12637 This can only ever happen for long double or _Decimal128;
12638 complex types are handled via split_complex_arg. */
12639 machine_mode fmode = elt_mode;
12640 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12642 gcc_assert (FLOAT128_2REG_P (fmode));
12643 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12646 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12647 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12648 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12651 /* If there were not enough FPRs to hold the argument, the rest
12652 usually goes into memory. However, if the current position
12653 is still within the register parameter area, a portion may
12654 actually have to go into GPRs.
12656 Note that it may happen that the portion of the argument
12657 passed in the first "half" of the first GPR was already
12658 passed in the last FPR as well.
12660 For unnamed arguments, we already set up GPRs to cover the
12661 whole argument in rs6000_psave_function_arg, so there is
12662 nothing further to do at this point. */
12663 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12664 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12665 && cum->nargs_prototype > 0)
12667 static bool warned;
12669 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12670 int n_words = rs6000_arg_size (mode, type);
12672 align_words += fpr_words;
12673 n_words -= fpr_words;
12677 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12678 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12679 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12681 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12683 if (!warned && warn_psabi)
12685 warned = true;
12686 inform (input_location,
12687 "the ABI of passing homogeneous float aggregates"
12688 " has changed in GCC 5");
12692 return rs6000_finish_function_arg (mode, rvec, k);
12694 else if (align_words < GP_ARG_NUM_REG)
12696 if (TARGET_32BIT && TARGET_POWERPC64)
12697 return rs6000_mixed_function_arg (mode, type, align_words);
12699 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12701 else
12702 return NULL_RTX;
12706 /* For an arg passed partly in registers and partly in memory, this is
12707 the number of bytes passed in registers. For args passed entirely in
12708 registers or entirely in memory, zero. When an arg is described by a
12709 PARALLEL, perhaps using more than one register type, this function
12710 returns the number of bytes used by the first element of the PARALLEL. */
12712 static int
12713 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12714 tree type, bool named)
12716 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12717 bool passed_in_gprs = true;
12718 int ret = 0;
12719 int align_words;
12720 machine_mode elt_mode;
12721 int n_elts;
12723 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12725 if (DEFAULT_ABI == ABI_V4)
12726 return 0;
12728 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12730 /* If we are passing this arg in the fixed parameter save area (gprs or
12731 memory) as well as VRs, we do not use the partial bytes mechanism;
12732 instead, rs6000_function_arg will return a PARALLEL including a memory
12733 element as necessary. Library support functions for IEEE 128-bit are
12734 assumed to not need the value passed both in GPRs and in vector
12735 registers. */
12736 if (TARGET_64BIT && !cum->prototype
12737 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12738 return 0;
12740 /* Otherwise, we pass in VRs only. Check for partial copies. */
12741 passed_in_gprs = false;
12742 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12743 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12746 /* In this complicated case we just disable the partial_nregs code. */
12747 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12748 return 0;
12750 align_words = rs6000_parm_start (mode, type, cum->words);
12752 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12754 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12756 /* If we are passing this arg in the fixed parameter save area
12757 (gprs or memory) as well as FPRs, we do not use the partial
12758 bytes mechanism; instead, rs6000_function_arg will return a
12759 PARALLEL including a memory element as necessary. */
12760 if (type
12761 && (cum->nargs_prototype <= 0
12762 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12763 && TARGET_XL_COMPAT
12764 && align_words >= GP_ARG_NUM_REG)))
12765 return 0;
12767 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12768 passed_in_gprs = false;
12769 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12771 /* Compute number of bytes / words passed in FPRs. If there
12772 is still space available in the register parameter area
12773 *after* that amount, a part of the argument will be passed
12774 in GPRs. In that case, the total amount passed in any
12775 registers is equal to the amount that would have been passed
12776 in GPRs if everything were passed there, so we fall back to
12777 the GPR code below to compute the appropriate value. */
12778 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12779 * MIN (8, GET_MODE_SIZE (elt_mode)));
12780 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12782 if (align_words + fpr_words < GP_ARG_NUM_REG)
12783 passed_in_gprs = true;
12784 else
12785 ret = fpr;
12789 if (passed_in_gprs
12790 && align_words < GP_ARG_NUM_REG
12791 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12792 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12794 if (ret != 0 && TARGET_DEBUG_ARG)
12795 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12797 return ret;
12800 /* A C expression that indicates when an argument must be passed by
12801 reference. If nonzero for an argument, a copy of that argument is
12802 made in memory and a pointer to the argument is passed instead of
12803 the argument itself. The pointer is passed in whatever way is
12804 appropriate for passing a pointer to that type.
12806 Under V.4, aggregates and long double are passed by reference.
12808 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12809 reference unless the AltiVec vector extension ABI is in force.
12811 As an extension to all ABIs, variable sized types are passed by
12812 reference. */
12814 static bool
12815 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12816 machine_mode mode, const_tree type,
12817 bool named ATTRIBUTE_UNUSED)
12819 if (!type)
12820 return 0;
12822 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12823 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12825 if (TARGET_DEBUG_ARG)
12826 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12827 return 1;
12830 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12832 if (TARGET_DEBUG_ARG)
12833 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12834 return 1;
12837 if (int_size_in_bytes (type) < 0)
12839 if (TARGET_DEBUG_ARG)
12840 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12841 return 1;
12844 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12845 modes only exist for GCC vector types if -maltivec. */
12846 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12848 if (TARGET_DEBUG_ARG)
12849 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12850 return 1;
12853 /* Pass synthetic vectors in memory. */
12854 if (TREE_CODE (type) == VECTOR_TYPE
12855 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12857 static bool warned_for_pass_big_vectors = false;
12858 if (TARGET_DEBUG_ARG)
12859 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12860 if (!warned_for_pass_big_vectors)
12862 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12863 "non-standard ABI extension with no compatibility guarantee");
12864 warned_for_pass_big_vectors = true;
12866 return 1;
12869 return 0;
12872 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12873 already processes. Return true if the parameter must be passed
12874 (fully or partially) on the stack. */
12876 static bool
12877 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12879 machine_mode mode;
12880 int unsignedp;
12881 rtx entry_parm;
12883 /* Catch errors. */
12884 if (type == NULL || type == error_mark_node)
12885 return true;
12887 /* Handle types with no storage requirement. */
12888 if (TYPE_MODE (type) == VOIDmode)
12889 return false;
12891 /* Handle complex types. */
12892 if (TREE_CODE (type) == COMPLEX_TYPE)
12893 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12894 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12896 /* Handle transparent aggregates. */
12897 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12898 && TYPE_TRANSPARENT_AGGR (type))
12899 type = TREE_TYPE (first_field (type));
12901 /* See if this arg was passed by invisible reference. */
12902 if (pass_by_reference (get_cumulative_args (args_so_far),
12903 TYPE_MODE (type), type, true))
12904 type = build_pointer_type (type);
12906 /* Find mode as it is passed by the ABI. */
12907 unsignedp = TYPE_UNSIGNED (type);
12908 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12910 /* If we must pass in stack, we need a stack. */
12911 if (rs6000_must_pass_in_stack (mode, type))
12912 return true;
12914 /* If there is no incoming register, we need a stack. */
12915 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12916 if (entry_parm == NULL)
12917 return true;
12919 /* Likewise if we need to pass both in registers and on the stack. */
12920 if (GET_CODE (entry_parm) == PARALLEL
12921 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12922 return true;
12924 /* Also true if we're partially in registers and partially not. */
12925 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12926 return true;
12928 /* Update info on where next arg arrives in registers. */
12929 rs6000_function_arg_advance (args_so_far, mode, type, true);
12930 return false;
12933 /* Return true if FUN has no prototype, has a variable argument
12934 list, or passes any parameter in memory. */
12936 static bool
12937 rs6000_function_parms_need_stack (tree fun, bool incoming)
12939 tree fntype, result;
12940 CUMULATIVE_ARGS args_so_far_v;
12941 cumulative_args_t args_so_far;
12943 if (!fun)
12944 /* Must be a libcall, all of which only use reg parms. */
12945 return false;
12947 fntype = fun;
12948 if (!TYPE_P (fun))
12949 fntype = TREE_TYPE (fun);
12951 /* Varargs functions need the parameter save area. */
12952 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12953 return true;
12955 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12956 args_so_far = pack_cumulative_args (&args_so_far_v);
12958 /* When incoming, we will have been passed the function decl.
12959 It is necessary to use the decl to handle K&R style functions,
12960 where TYPE_ARG_TYPES may not be available. */
12961 if (incoming)
12963 gcc_assert (DECL_P (fun));
12964 result = DECL_RESULT (fun);
12966 else
12967 result = TREE_TYPE (fntype);
12969 if (result && aggregate_value_p (result, fntype))
12971 if (!TYPE_P (result))
12972 result = TREE_TYPE (result);
12973 result = build_pointer_type (result);
12974 rs6000_parm_needs_stack (args_so_far, result);
12977 if (incoming)
12979 tree parm;
12981 for (parm = DECL_ARGUMENTS (fun);
12982 parm && parm != void_list_node;
12983 parm = TREE_CHAIN (parm))
12984 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12985 return true;
12987 else
12989 function_args_iterator args_iter;
12990 tree arg_type;
12992 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12993 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12994 return true;
12997 return false;
13000 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13001 usually a constant depending on the ABI. However, in the ELFv2 ABI
13002 the register parameter area is optional when calling a function that
13003 has a prototype is scope, has no variable argument list, and passes
13004 all parameters in registers. */
13007 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13009 int reg_parm_stack_space;
13011 switch (DEFAULT_ABI)
13013 default:
13014 reg_parm_stack_space = 0;
13015 break;
13017 case ABI_AIX:
13018 case ABI_DARWIN:
13019 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13020 break;
13022 case ABI_ELFv2:
13023 /* ??? Recomputing this every time is a bit expensive. Is there
13024 a place to cache this information? */
13025 if (rs6000_function_parms_need_stack (fun, incoming))
13026 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13027 else
13028 reg_parm_stack_space = 0;
13029 break;
13032 return reg_parm_stack_space;
13035 static void
13036 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13038 int i;
13039 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13041 if (nregs == 0)
13042 return;
13044 for (i = 0; i < nregs; i++)
13046 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13047 if (reload_completed)
13049 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13050 tem = NULL_RTX;
13051 else
13052 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13053 i * GET_MODE_SIZE (reg_mode));
13055 else
13056 tem = replace_equiv_address (tem, XEXP (tem, 0));
13058 gcc_assert (tem);
13060 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13064 /* Perform any needed actions needed for a function that is receiving a
13065 variable number of arguments.
13067 CUM is as above.
13069 MODE and TYPE are the mode and type of the current parameter.
13071 PRETEND_SIZE is a variable that should be set to the amount of stack
13072 that must be pushed by the prolog to pretend that our caller pushed
13075 Normally, this macro will push all remaining incoming registers on the
13076 stack and set PRETEND_SIZE to the length of the registers pushed. */
13078 static void
13079 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13080 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13081 int no_rtl)
13083 CUMULATIVE_ARGS next_cum;
13084 int reg_size = TARGET_32BIT ? 4 : 8;
13085 rtx save_area = NULL_RTX, mem;
13086 int first_reg_offset;
13087 alias_set_type set;
13089 /* Skip the last named argument. */
13090 next_cum = *get_cumulative_args (cum);
13091 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13093 if (DEFAULT_ABI == ABI_V4)
13095 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13097 if (! no_rtl)
13099 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13100 HOST_WIDE_INT offset = 0;
13102 /* Try to optimize the size of the varargs save area.
13103 The ABI requires that ap.reg_save_area is doubleword
13104 aligned, but we don't need to allocate space for all
13105 the bytes, only those to which we actually will save
13106 anything. */
13107 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13108 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13109 if (TARGET_HARD_FLOAT
13110 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13111 && cfun->va_list_fpr_size)
13113 if (gpr_reg_num)
13114 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13115 * UNITS_PER_FP_WORD;
13116 if (cfun->va_list_fpr_size
13117 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13118 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13119 else
13120 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13121 * UNITS_PER_FP_WORD;
13123 if (gpr_reg_num)
13125 offset = -((first_reg_offset * reg_size) & ~7);
13126 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13128 gpr_reg_num = cfun->va_list_gpr_size;
13129 if (reg_size == 4 && (first_reg_offset & 1))
13130 gpr_reg_num++;
13132 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13134 else if (fpr_size)
13135 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13136 * UNITS_PER_FP_WORD
13137 - (int) (GP_ARG_NUM_REG * reg_size);
13139 if (gpr_size + fpr_size)
13141 rtx reg_save_area
13142 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13143 gcc_assert (GET_CODE (reg_save_area) == MEM);
13144 reg_save_area = XEXP (reg_save_area, 0);
13145 if (GET_CODE (reg_save_area) == PLUS)
13147 gcc_assert (XEXP (reg_save_area, 0)
13148 == virtual_stack_vars_rtx);
13149 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13150 offset += INTVAL (XEXP (reg_save_area, 1));
13152 else
13153 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13156 cfun->machine->varargs_save_offset = offset;
13157 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13160 else
13162 first_reg_offset = next_cum.words;
13163 save_area = crtl->args.internal_arg_pointer;
13165 if (targetm.calls.must_pass_in_stack (mode, type))
13166 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13169 set = get_varargs_alias_set ();
13170 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13171 && cfun->va_list_gpr_size)
13173 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13175 if (va_list_gpr_counter_field)
13176 /* V4 va_list_gpr_size counts number of registers needed. */
13177 n_gpr = cfun->va_list_gpr_size;
13178 else
13179 /* char * va_list instead counts number of bytes needed. */
13180 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13182 if (nregs > n_gpr)
13183 nregs = n_gpr;
13185 mem = gen_rtx_MEM (BLKmode,
13186 plus_constant (Pmode, save_area,
13187 first_reg_offset * reg_size));
13188 MEM_NOTRAP_P (mem) = 1;
13189 set_mem_alias_set (mem, set);
13190 set_mem_align (mem, BITS_PER_WORD);
13192 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13193 nregs);
13196 /* Save FP registers if needed. */
13197 if (DEFAULT_ABI == ABI_V4
13198 && TARGET_HARD_FLOAT
13199 && ! no_rtl
13200 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13201 && cfun->va_list_fpr_size)
13203 int fregno = next_cum.fregno, nregs;
13204 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13205 rtx lab = gen_label_rtx ();
13206 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13207 * UNITS_PER_FP_WORD);
13209 emit_jump_insn
13210 (gen_rtx_SET (pc_rtx,
13211 gen_rtx_IF_THEN_ELSE (VOIDmode,
13212 gen_rtx_NE (VOIDmode, cr1,
13213 const0_rtx),
13214 gen_rtx_LABEL_REF (VOIDmode, lab),
13215 pc_rtx)));
13217 for (nregs = 0;
13218 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13219 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13221 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13222 ? DFmode : SFmode,
13223 plus_constant (Pmode, save_area, off));
13224 MEM_NOTRAP_P (mem) = 1;
13225 set_mem_alias_set (mem, set);
13226 set_mem_align (mem, GET_MODE_ALIGNMENT (
13227 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13228 ? DFmode : SFmode));
13229 emit_move_insn (mem, gen_rtx_REG (
13230 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13231 ? DFmode : SFmode, fregno));
13234 emit_label (lab);
13238 /* Create the va_list data type. */
13240 static tree
13241 rs6000_build_builtin_va_list (void)
13243 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13245 /* For AIX, prefer 'char *' because that's what the system
13246 header files like. */
13247 if (DEFAULT_ABI != ABI_V4)
13248 return build_pointer_type (char_type_node);
13250 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13251 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13252 get_identifier ("__va_list_tag"), record);
13254 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13255 unsigned_char_type_node);
13256 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13257 unsigned_char_type_node);
13258 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13259 every user file. */
13260 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13261 get_identifier ("reserved"), short_unsigned_type_node);
13262 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13263 get_identifier ("overflow_arg_area"),
13264 ptr_type_node);
13265 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13266 get_identifier ("reg_save_area"),
13267 ptr_type_node);
13269 va_list_gpr_counter_field = f_gpr;
13270 va_list_fpr_counter_field = f_fpr;
13272 DECL_FIELD_CONTEXT (f_gpr) = record;
13273 DECL_FIELD_CONTEXT (f_fpr) = record;
13274 DECL_FIELD_CONTEXT (f_res) = record;
13275 DECL_FIELD_CONTEXT (f_ovf) = record;
13276 DECL_FIELD_CONTEXT (f_sav) = record;
13278 TYPE_STUB_DECL (record) = type_decl;
13279 TYPE_NAME (record) = type_decl;
13280 TYPE_FIELDS (record) = f_gpr;
13281 DECL_CHAIN (f_gpr) = f_fpr;
13282 DECL_CHAIN (f_fpr) = f_res;
13283 DECL_CHAIN (f_res) = f_ovf;
13284 DECL_CHAIN (f_ovf) = f_sav;
13286 layout_type (record);
13288 /* The correct type is an array type of one element. */
13289 return build_array_type (record, build_index_type (size_zero_node));
13292 /* Implement va_start. */
13294 static void
13295 rs6000_va_start (tree valist, rtx nextarg)
13297 HOST_WIDE_INT words, n_gpr, n_fpr;
13298 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13299 tree gpr, fpr, ovf, sav, t;
13301 /* Only SVR4 needs something special. */
13302 if (DEFAULT_ABI != ABI_V4)
13304 std_expand_builtin_va_start (valist, nextarg);
13305 return;
13308 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13309 f_fpr = DECL_CHAIN (f_gpr);
13310 f_res = DECL_CHAIN (f_fpr);
13311 f_ovf = DECL_CHAIN (f_res);
13312 f_sav = DECL_CHAIN (f_ovf);
13314 valist = build_simple_mem_ref (valist);
13315 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13316 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13317 f_fpr, NULL_TREE);
13318 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13319 f_ovf, NULL_TREE);
13320 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13321 f_sav, NULL_TREE);
13323 /* Count number of gp and fp argument registers used. */
13324 words = crtl->args.info.words;
13325 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13326 GP_ARG_NUM_REG);
13327 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13328 FP_ARG_NUM_REG);
13330 if (TARGET_DEBUG_ARG)
13331 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13332 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13333 words, n_gpr, n_fpr);
13335 if (cfun->va_list_gpr_size)
13337 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13338 build_int_cst (NULL_TREE, n_gpr));
13339 TREE_SIDE_EFFECTS (t) = 1;
13340 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13343 if (cfun->va_list_fpr_size)
13345 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13346 build_int_cst (NULL_TREE, n_fpr));
13347 TREE_SIDE_EFFECTS (t) = 1;
13348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13350 #ifdef HAVE_AS_GNU_ATTRIBUTE
13351 if (call_ABI_of_interest (cfun->decl))
13352 rs6000_passes_float = true;
13353 #endif
13356 /* Find the overflow area. */
13357 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13358 if (words != 0)
13359 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13360 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13361 TREE_SIDE_EFFECTS (t) = 1;
13362 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13364 /* If there were no va_arg invocations, don't set up the register
13365 save area. */
13366 if (!cfun->va_list_gpr_size
13367 && !cfun->va_list_fpr_size
13368 && n_gpr < GP_ARG_NUM_REG
13369 && n_fpr < FP_ARG_V4_MAX_REG)
13370 return;
13372 /* Find the register save area. */
13373 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13374 if (cfun->machine->varargs_save_offset)
13375 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13376 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13377 TREE_SIDE_EFFECTS (t) = 1;
13378 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13381 /* Implement va_arg. */
13383 static tree
13384 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13385 gimple_seq *post_p)
13387 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13388 tree gpr, fpr, ovf, sav, reg, t, u;
13389 int size, rsize, n_reg, sav_ofs, sav_scale;
13390 tree lab_false, lab_over, addr;
13391 int align;
13392 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13393 int regalign = 0;
13394 gimple *stmt;
13396 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13398 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13399 return build_va_arg_indirect_ref (t);
13402 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13403 earlier version of gcc, with the property that it always applied alignment
13404 adjustments to the va-args (even for zero-sized types). The cheapest way
13405 to deal with this is to replicate the effect of the part of
13406 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13407 of relevance.
13408 We don't need to check for pass-by-reference because of the test above.
13409 We can return a simplifed answer, since we know there's no offset to add. */
13411 if (((TARGET_MACHO
13412 && rs6000_darwin64_abi)
13413 || DEFAULT_ABI == ABI_ELFv2
13414 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13415 && integer_zerop (TYPE_SIZE (type)))
13417 unsigned HOST_WIDE_INT align, boundary;
13418 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13419 align = PARM_BOUNDARY / BITS_PER_UNIT;
13420 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13421 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13422 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13423 boundary /= BITS_PER_UNIT;
13424 if (boundary > align)
13426 tree t ;
13427 /* This updates arg ptr by the amount that would be necessary
13428 to align the zero-sized (but not zero-alignment) item. */
13429 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13430 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13431 gimplify_and_add (t, pre_p);
13433 t = fold_convert (sizetype, valist_tmp);
13434 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13435 fold_convert (TREE_TYPE (valist),
13436 fold_build2 (BIT_AND_EXPR, sizetype, t,
13437 size_int (-boundary))));
13438 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13439 gimplify_and_add (t, pre_p);
13441 /* Since it is zero-sized there's no increment for the item itself. */
13442 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13443 return build_va_arg_indirect_ref (valist_tmp);
13446 if (DEFAULT_ABI != ABI_V4)
13448 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13450 tree elem_type = TREE_TYPE (type);
13451 machine_mode elem_mode = TYPE_MODE (elem_type);
13452 int elem_size = GET_MODE_SIZE (elem_mode);
13454 if (elem_size < UNITS_PER_WORD)
13456 tree real_part, imag_part;
13457 gimple_seq post = NULL;
13459 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13460 &post);
13461 /* Copy the value into a temporary, lest the formal temporary
13462 be reused out from under us. */
13463 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13464 gimple_seq_add_seq (pre_p, post);
13466 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13467 post_p);
13469 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13473 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13476 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13477 f_fpr = DECL_CHAIN (f_gpr);
13478 f_res = DECL_CHAIN (f_fpr);
13479 f_ovf = DECL_CHAIN (f_res);
13480 f_sav = DECL_CHAIN (f_ovf);
13482 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13483 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13484 f_fpr, NULL_TREE);
13485 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13486 f_ovf, NULL_TREE);
13487 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13488 f_sav, NULL_TREE);
13490 size = int_size_in_bytes (type);
13491 rsize = (size + 3) / 4;
13492 int pad = 4 * rsize - size;
13493 align = 1;
13495 machine_mode mode = TYPE_MODE (type);
13496 if (abi_v4_pass_in_fpr (mode))
13498 /* FP args go in FP registers, if present. */
13499 reg = fpr;
13500 n_reg = (size + 7) / 8;
13501 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13502 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13503 if (mode != SFmode && mode != SDmode)
13504 align = 8;
13506 else
13508 /* Otherwise into GP registers. */
13509 reg = gpr;
13510 n_reg = rsize;
13511 sav_ofs = 0;
13512 sav_scale = 4;
13513 if (n_reg == 2)
13514 align = 8;
13517 /* Pull the value out of the saved registers.... */
13519 lab_over = NULL;
13520 addr = create_tmp_var (ptr_type_node, "addr");
13522 /* AltiVec vectors never go in registers when -mabi=altivec. */
13523 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13524 align = 16;
13525 else
13527 lab_false = create_artificial_label (input_location);
13528 lab_over = create_artificial_label (input_location);
13530 /* Long long is aligned in the registers. As are any other 2 gpr
13531 item such as complex int due to a historical mistake. */
13532 u = reg;
13533 if (n_reg == 2 && reg == gpr)
13535 regalign = 1;
13536 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13537 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13538 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13539 unshare_expr (reg), u);
13541 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13542 reg number is 0 for f1, so we want to make it odd. */
13543 else if (reg == fpr && mode == TDmode)
13545 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13546 build_int_cst (TREE_TYPE (reg), 1));
13547 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13550 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13551 t = build2 (GE_EXPR, boolean_type_node, u, t);
13552 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13553 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13554 gimplify_and_add (t, pre_p);
13556 t = sav;
13557 if (sav_ofs)
13558 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13560 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13561 build_int_cst (TREE_TYPE (reg), n_reg));
13562 u = fold_convert (sizetype, u);
13563 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13564 t = fold_build_pointer_plus (t, u);
13566 /* _Decimal32 varargs are located in the second word of the 64-bit
13567 FP register for 32-bit binaries. */
13568 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
13569 t = fold_build_pointer_plus_hwi (t, size);
13571 /* Args are passed right-aligned. */
13572 if (BYTES_BIG_ENDIAN)
13573 t = fold_build_pointer_plus_hwi (t, pad);
13575 gimplify_assign (addr, t, pre_p);
13577 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13579 stmt = gimple_build_label (lab_false);
13580 gimple_seq_add_stmt (pre_p, stmt);
13582 if ((n_reg == 2 && !regalign) || n_reg > 2)
13584 /* Ensure that we don't find any more args in regs.
13585 Alignment has taken care of for special cases. */
13586 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13590 /* ... otherwise out of the overflow area. */
13592 /* Care for on-stack alignment if needed. */
13593 t = ovf;
13594 if (align != 1)
13596 t = fold_build_pointer_plus_hwi (t, align - 1);
13597 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13598 build_int_cst (TREE_TYPE (t), -align));
13601 /* Args are passed right-aligned. */
13602 if (BYTES_BIG_ENDIAN)
13603 t = fold_build_pointer_plus_hwi (t, pad);
13605 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13607 gimplify_assign (unshare_expr (addr), t, pre_p);
13609 t = fold_build_pointer_plus_hwi (t, size);
13610 gimplify_assign (unshare_expr (ovf), t, pre_p);
13612 if (lab_over)
13614 stmt = gimple_build_label (lab_over);
13615 gimple_seq_add_stmt (pre_p, stmt);
13618 if (STRICT_ALIGNMENT
13619 && (TYPE_ALIGN (type)
13620 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13622 /* The value (of type complex double, for example) may not be
13623 aligned in memory in the saved registers, so copy via a
13624 temporary. (This is the same code as used for SPARC.) */
13625 tree tmp = create_tmp_var (type, "va_arg_tmp");
13626 tree dest_addr = build_fold_addr_expr (tmp);
13628 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13629 3, dest_addr, addr, size_int (rsize * 4));
13631 gimplify_and_add (copy, pre_p);
13632 addr = dest_addr;
13635 addr = fold_convert (ptrtype, addr);
13636 return build_va_arg_indirect_ref (addr);
13639 /* Builtins. */
13641 static void
13642 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13644 tree t;
13645 unsigned classify = rs6000_builtin_info[(int)code].attr;
13646 const char *attr_string = "";
13648 gcc_assert (name != NULL);
13649 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13651 if (rs6000_builtin_decls[(int)code])
13652 fatal_error (input_location,
13653 "internal error: builtin function %s already processed", name);
13655 rs6000_builtin_decls[(int)code] = t =
13656 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13658 /* Set any special attributes. */
13659 if ((classify & RS6000_BTC_CONST) != 0)
13661 /* const function, function only depends on the inputs. */
13662 TREE_READONLY (t) = 1;
13663 TREE_NOTHROW (t) = 1;
13664 attr_string = ", const";
13666 else if ((classify & RS6000_BTC_PURE) != 0)
13668 /* pure function, function can read global memory, but does not set any
13669 external state. */
13670 DECL_PURE_P (t) = 1;
13671 TREE_NOTHROW (t) = 1;
13672 attr_string = ", pure";
13674 else if ((classify & RS6000_BTC_FP) != 0)
13676 /* Function is a math function. If rounding mode is on, then treat the
13677 function as not reading global memory, but it can have arbitrary side
13678 effects. If it is off, then assume the function is a const function.
13679 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13680 builtin-attribute.def that is used for the math functions. */
13681 TREE_NOTHROW (t) = 1;
13682 if (flag_rounding_math)
13684 DECL_PURE_P (t) = 1;
13685 DECL_IS_NOVOPS (t) = 1;
13686 attr_string = ", fp, pure";
13688 else
13690 TREE_READONLY (t) = 1;
13691 attr_string = ", fp, const";
13694 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13695 gcc_unreachable ();
13697 if (TARGET_DEBUG_BUILTIN)
13698 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13699 (int)code, name, attr_string);
13702 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13704 #undef RS6000_BUILTIN_0
13705 #undef RS6000_BUILTIN_1
13706 #undef RS6000_BUILTIN_2
13707 #undef RS6000_BUILTIN_3
13708 #undef RS6000_BUILTIN_A
13709 #undef RS6000_BUILTIN_D
13710 #undef RS6000_BUILTIN_H
13711 #undef RS6000_BUILTIN_P
13712 #undef RS6000_BUILTIN_Q
13713 #undef RS6000_BUILTIN_X
13715 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13716 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13717 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13718 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13719 { MASK, ICODE, NAME, ENUM },
13721 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13722 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13723 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13724 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13725 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13726 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13728 static const struct builtin_description bdesc_3arg[] =
13730 #include "rs6000-builtin.def"
13733 /* DST operations: void foo (void *, const int, const char). */
13735 #undef RS6000_BUILTIN_0
13736 #undef RS6000_BUILTIN_1
13737 #undef RS6000_BUILTIN_2
13738 #undef RS6000_BUILTIN_3
13739 #undef RS6000_BUILTIN_A
13740 #undef RS6000_BUILTIN_D
13741 #undef RS6000_BUILTIN_H
13742 #undef RS6000_BUILTIN_P
13743 #undef RS6000_BUILTIN_Q
13744 #undef RS6000_BUILTIN_X
13746 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13747 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13748 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13749 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13750 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13751 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13752 { MASK, ICODE, NAME, ENUM },
13754 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13755 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13756 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13757 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13759 static const struct builtin_description bdesc_dst[] =
13761 #include "rs6000-builtin.def"
13764 /* Simple binary operations: VECc = foo (VECa, VECb). */
13766 #undef RS6000_BUILTIN_0
13767 #undef RS6000_BUILTIN_1
13768 #undef RS6000_BUILTIN_2
13769 #undef RS6000_BUILTIN_3
13770 #undef RS6000_BUILTIN_A
13771 #undef RS6000_BUILTIN_D
13772 #undef RS6000_BUILTIN_H
13773 #undef RS6000_BUILTIN_P
13774 #undef RS6000_BUILTIN_Q
13775 #undef RS6000_BUILTIN_X
13777 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13778 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13779 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13780 { MASK, ICODE, NAME, ENUM },
13782 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13783 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13784 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13785 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13786 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13787 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13788 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13790 static const struct builtin_description bdesc_2arg[] =
13792 #include "rs6000-builtin.def"
13795 #undef RS6000_BUILTIN_0
13796 #undef RS6000_BUILTIN_1
13797 #undef RS6000_BUILTIN_2
13798 #undef RS6000_BUILTIN_3
13799 #undef RS6000_BUILTIN_A
13800 #undef RS6000_BUILTIN_D
13801 #undef RS6000_BUILTIN_H
13802 #undef RS6000_BUILTIN_P
13803 #undef RS6000_BUILTIN_Q
13804 #undef RS6000_BUILTIN_X
13806 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13807 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13808 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13809 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13810 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13811 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13812 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13813 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13814 { MASK, ICODE, NAME, ENUM },
13816 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13817 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13819 /* AltiVec predicates. */
13821 static const struct builtin_description bdesc_altivec_preds[] =
13823 #include "rs6000-builtin.def"
13826 /* PAIRED predicates. */
13827 #undef RS6000_BUILTIN_0
13828 #undef RS6000_BUILTIN_1
13829 #undef RS6000_BUILTIN_2
13830 #undef RS6000_BUILTIN_3
13831 #undef RS6000_BUILTIN_A
13832 #undef RS6000_BUILTIN_D
13833 #undef RS6000_BUILTIN_H
13834 #undef RS6000_BUILTIN_P
13835 #undef RS6000_BUILTIN_Q
13836 #undef RS6000_BUILTIN_X
13838 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13839 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13840 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13841 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13842 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13843 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13844 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13845 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13846 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13847 { MASK, ICODE, NAME, ENUM },
13849 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13851 static const struct builtin_description bdesc_paired_preds[] =
13853 #include "rs6000-builtin.def"
13856 /* ABS* operations. */
13858 #undef RS6000_BUILTIN_0
13859 #undef RS6000_BUILTIN_1
13860 #undef RS6000_BUILTIN_2
13861 #undef RS6000_BUILTIN_3
13862 #undef RS6000_BUILTIN_A
13863 #undef RS6000_BUILTIN_D
13864 #undef RS6000_BUILTIN_H
13865 #undef RS6000_BUILTIN_P
13866 #undef RS6000_BUILTIN_Q
13867 #undef RS6000_BUILTIN_X
13869 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13870 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13871 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13872 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13873 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13874 { MASK, ICODE, NAME, ENUM },
13876 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13877 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13878 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13879 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13880 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13882 static const struct builtin_description bdesc_abs[] =
13884 #include "rs6000-builtin.def"
13887 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13888 foo (VECa). */
13890 #undef RS6000_BUILTIN_0
13891 #undef RS6000_BUILTIN_1
13892 #undef RS6000_BUILTIN_2
13893 #undef RS6000_BUILTIN_3
13894 #undef RS6000_BUILTIN_A
13895 #undef RS6000_BUILTIN_D
13896 #undef RS6000_BUILTIN_H
13897 #undef RS6000_BUILTIN_P
13898 #undef RS6000_BUILTIN_Q
13899 #undef RS6000_BUILTIN_X
13901 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13902 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13903 { MASK, ICODE, NAME, ENUM },
13905 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13906 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13907 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13908 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13909 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13910 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13911 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13912 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13914 static const struct builtin_description bdesc_1arg[] =
13916 #include "rs6000-builtin.def"
13919 /* Simple no-argument operations: result = __builtin_darn_32 () */
13921 #undef RS6000_BUILTIN_0
13922 #undef RS6000_BUILTIN_1
13923 #undef RS6000_BUILTIN_2
13924 #undef RS6000_BUILTIN_3
13925 #undef RS6000_BUILTIN_A
13926 #undef RS6000_BUILTIN_D
13927 #undef RS6000_BUILTIN_H
13928 #undef RS6000_BUILTIN_P
13929 #undef RS6000_BUILTIN_Q
13930 #undef RS6000_BUILTIN_X
13932 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13933 { MASK, ICODE, NAME, ENUM },
13935 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13936 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13937 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13938 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13939 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13940 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13941 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13942 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13943 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13945 static const struct builtin_description bdesc_0arg[] =
13947 #include "rs6000-builtin.def"
13950 /* HTM builtins. */
13951 #undef RS6000_BUILTIN_0
13952 #undef RS6000_BUILTIN_1
13953 #undef RS6000_BUILTIN_2
13954 #undef RS6000_BUILTIN_3
13955 #undef RS6000_BUILTIN_A
13956 #undef RS6000_BUILTIN_D
13957 #undef RS6000_BUILTIN_H
13958 #undef RS6000_BUILTIN_P
13959 #undef RS6000_BUILTIN_Q
13960 #undef RS6000_BUILTIN_X
13962 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13963 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13964 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13965 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13966 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13967 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13968 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13969 { MASK, ICODE, NAME, ENUM },
13971 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13972 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13973 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13975 static const struct builtin_description bdesc_htm[] =
13977 #include "rs6000-builtin.def"
13980 #undef RS6000_BUILTIN_0
13981 #undef RS6000_BUILTIN_1
13982 #undef RS6000_BUILTIN_2
13983 #undef RS6000_BUILTIN_3
13984 #undef RS6000_BUILTIN_A
13985 #undef RS6000_BUILTIN_D
13986 #undef RS6000_BUILTIN_H
13987 #undef RS6000_BUILTIN_P
13988 #undef RS6000_BUILTIN_Q
13990 /* Return true if a builtin function is overloaded. */
13991 bool
13992 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13994 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13997 const char *
13998 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14000 return rs6000_builtin_info[(int)fncode].name;
14003 /* Expand an expression EXP that calls a builtin without arguments. */
14004 static rtx
14005 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14007 rtx pat;
14008 machine_mode tmode = insn_data[icode].operand[0].mode;
14010 if (icode == CODE_FOR_nothing)
14011 /* Builtin not supported on this processor. */
14012 return 0;
14014 if (target == 0
14015 || GET_MODE (target) != tmode
14016 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14017 target = gen_reg_rtx (tmode);
14019 pat = GEN_FCN (icode) (target);
14020 if (! pat)
14021 return 0;
14022 emit_insn (pat);
14024 return target;
14028 static rtx
14029 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14031 rtx pat;
14032 tree arg0 = CALL_EXPR_ARG (exp, 0);
14033 tree arg1 = CALL_EXPR_ARG (exp, 1);
14034 rtx op0 = expand_normal (arg0);
14035 rtx op1 = expand_normal (arg1);
14036 machine_mode mode0 = insn_data[icode].operand[0].mode;
14037 machine_mode mode1 = insn_data[icode].operand[1].mode;
14039 if (icode == CODE_FOR_nothing)
14040 /* Builtin not supported on this processor. */
14041 return 0;
14043 /* If we got invalid arguments bail out before generating bad rtl. */
14044 if (arg0 == error_mark_node || arg1 == error_mark_node)
14045 return const0_rtx;
14047 if (GET_CODE (op0) != CONST_INT
14048 || INTVAL (op0) > 255
14049 || INTVAL (op0) < 0)
14051 error ("argument 1 must be an 8-bit field value");
14052 return const0_rtx;
14055 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14056 op0 = copy_to_mode_reg (mode0, op0);
14058 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14059 op1 = copy_to_mode_reg (mode1, op1);
14061 pat = GEN_FCN (icode) (op0, op1);
14062 if (! pat)
14063 return const0_rtx;
14064 emit_insn (pat);
14066 return NULL_RTX;
14069 static rtx
14070 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14072 rtx pat;
14073 tree arg0 = CALL_EXPR_ARG (exp, 0);
14074 rtx op0 = expand_normal (arg0);
14075 machine_mode tmode = insn_data[icode].operand[0].mode;
14076 machine_mode mode0 = insn_data[icode].operand[1].mode;
14078 if (icode == CODE_FOR_nothing)
14079 /* Builtin not supported on this processor. */
14080 return 0;
14082 /* If we got invalid arguments bail out before generating bad rtl. */
14083 if (arg0 == error_mark_node)
14084 return const0_rtx;
14086 if (icode == CODE_FOR_altivec_vspltisb
14087 || icode == CODE_FOR_altivec_vspltish
14088 || icode == CODE_FOR_altivec_vspltisw)
14090 /* Only allow 5-bit *signed* literals. */
14091 if (GET_CODE (op0) != CONST_INT
14092 || INTVAL (op0) > 15
14093 || INTVAL (op0) < -16)
14095 error ("argument 1 must be a 5-bit signed literal");
14096 return CONST0_RTX (tmode);
14100 if (target == 0
14101 || GET_MODE (target) != tmode
14102 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14103 target = gen_reg_rtx (tmode);
14105 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14106 op0 = copy_to_mode_reg (mode0, op0);
14108 pat = GEN_FCN (icode) (target, op0);
14109 if (! pat)
14110 return 0;
14111 emit_insn (pat);
14113 return target;
14116 static rtx
14117 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14119 rtx pat, scratch1, scratch2;
14120 tree arg0 = CALL_EXPR_ARG (exp, 0);
14121 rtx op0 = expand_normal (arg0);
14122 machine_mode tmode = insn_data[icode].operand[0].mode;
14123 machine_mode mode0 = insn_data[icode].operand[1].mode;
14125 /* If we have invalid arguments, bail out before generating bad rtl. */
14126 if (arg0 == error_mark_node)
14127 return const0_rtx;
14129 if (target == 0
14130 || GET_MODE (target) != tmode
14131 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14132 target = gen_reg_rtx (tmode);
14134 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14135 op0 = copy_to_mode_reg (mode0, op0);
14137 scratch1 = gen_reg_rtx (mode0);
14138 scratch2 = gen_reg_rtx (mode0);
14140 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14141 if (! pat)
14142 return 0;
14143 emit_insn (pat);
14145 return target;
14148 static rtx
14149 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14151 rtx pat;
14152 tree arg0 = CALL_EXPR_ARG (exp, 0);
14153 tree arg1 = CALL_EXPR_ARG (exp, 1);
14154 rtx op0 = expand_normal (arg0);
14155 rtx op1 = expand_normal (arg1);
14156 machine_mode tmode = insn_data[icode].operand[0].mode;
14157 machine_mode mode0 = insn_data[icode].operand[1].mode;
14158 machine_mode mode1 = insn_data[icode].operand[2].mode;
14160 if (icode == CODE_FOR_nothing)
14161 /* Builtin not supported on this processor. */
14162 return 0;
14164 /* If we got invalid arguments bail out before generating bad rtl. */
14165 if (arg0 == error_mark_node || arg1 == error_mark_node)
14166 return const0_rtx;
14168 if (icode == CODE_FOR_altivec_vcfux
14169 || icode == CODE_FOR_altivec_vcfsx
14170 || icode == CODE_FOR_altivec_vctsxs
14171 || icode == CODE_FOR_altivec_vctuxs
14172 || icode == CODE_FOR_altivec_vspltb
14173 || icode == CODE_FOR_altivec_vsplth
14174 || icode == CODE_FOR_altivec_vspltw)
14176 /* Only allow 5-bit unsigned literals. */
14177 STRIP_NOPS (arg1);
14178 if (TREE_CODE (arg1) != INTEGER_CST
14179 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14181 error ("argument 2 must be a 5-bit unsigned literal");
14182 return CONST0_RTX (tmode);
14185 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14186 || icode == CODE_FOR_dfptstsfi_lt_dd
14187 || icode == CODE_FOR_dfptstsfi_gt_dd
14188 || icode == CODE_FOR_dfptstsfi_unordered_dd
14189 || icode == CODE_FOR_dfptstsfi_eq_td
14190 || icode == CODE_FOR_dfptstsfi_lt_td
14191 || icode == CODE_FOR_dfptstsfi_gt_td
14192 || icode == CODE_FOR_dfptstsfi_unordered_td)
14194 /* Only allow 6-bit unsigned literals. */
14195 STRIP_NOPS (arg0);
14196 if (TREE_CODE (arg0) != INTEGER_CST
14197 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14199 error ("argument 1 must be a 6-bit unsigned literal");
14200 return CONST0_RTX (tmode);
14203 else if (icode == CODE_FOR_xststdcqp
14204 || icode == CODE_FOR_xststdcdp
14205 || icode == CODE_FOR_xststdcsp
14206 || icode == CODE_FOR_xvtstdcdp
14207 || icode == CODE_FOR_xvtstdcsp)
14209 /* Only allow 7-bit unsigned literals. */
14210 STRIP_NOPS (arg1);
14211 if (TREE_CODE (arg1) != INTEGER_CST
14212 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14214 error ("argument 2 must be a 7-bit unsigned literal");
14215 return CONST0_RTX (tmode);
14219 if (target == 0
14220 || GET_MODE (target) != tmode
14221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14222 target = gen_reg_rtx (tmode);
14224 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14225 op0 = copy_to_mode_reg (mode0, op0);
14226 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14227 op1 = copy_to_mode_reg (mode1, op1);
14229 pat = GEN_FCN (icode) (target, op0, op1);
14230 if (! pat)
14231 return 0;
14232 emit_insn (pat);
14234 return target;
14237 static rtx
14238 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14240 rtx pat, scratch;
14241 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14242 tree arg0 = CALL_EXPR_ARG (exp, 1);
14243 tree arg1 = CALL_EXPR_ARG (exp, 2);
14244 rtx op0 = expand_normal (arg0);
14245 rtx op1 = expand_normal (arg1);
14246 machine_mode tmode = SImode;
14247 machine_mode mode0 = insn_data[icode].operand[1].mode;
14248 machine_mode mode1 = insn_data[icode].operand[2].mode;
14249 int cr6_form_int;
14251 if (TREE_CODE (cr6_form) != INTEGER_CST)
14253 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14254 return const0_rtx;
14256 else
14257 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14259 gcc_assert (mode0 == mode1);
14261 /* If we have invalid arguments, bail out before generating bad rtl. */
14262 if (arg0 == error_mark_node || arg1 == error_mark_node)
14263 return const0_rtx;
14265 if (target == 0
14266 || GET_MODE (target) != tmode
14267 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14268 target = gen_reg_rtx (tmode);
14270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14271 op0 = copy_to_mode_reg (mode0, op0);
14272 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14273 op1 = copy_to_mode_reg (mode1, op1);
14275 /* Note that for many of the relevant operations (e.g. cmpne or
14276 cmpeq) with float or double operands, it makes more sense for the
14277 mode of the allocated scratch register to select a vector of
14278 integer. But the choice to copy the mode of operand 0 was made
14279 long ago and there are no plans to change it. */
14280 scratch = gen_reg_rtx (mode0);
14282 pat = GEN_FCN (icode) (scratch, op0, op1);
14283 if (! pat)
14284 return 0;
14285 emit_insn (pat);
14287 /* The vec_any* and vec_all* predicates use the same opcodes for two
14288 different operations, but the bits in CR6 will be different
14289 depending on what information we want. So we have to play tricks
14290 with CR6 to get the right bits out.
14292 If you think this is disgusting, look at the specs for the
14293 AltiVec predicates. */
14295 switch (cr6_form_int)
14297 case 0:
14298 emit_insn (gen_cr6_test_for_zero (target));
14299 break;
14300 case 1:
14301 emit_insn (gen_cr6_test_for_zero_reverse (target));
14302 break;
14303 case 2:
14304 emit_insn (gen_cr6_test_for_lt (target));
14305 break;
14306 case 3:
14307 emit_insn (gen_cr6_test_for_lt_reverse (target));
14308 break;
14309 default:
14310 error ("argument 1 of __builtin_altivec_predicate is out of range");
14311 break;
14314 return target;
14317 static rtx
14318 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14320 rtx pat, addr;
14321 tree arg0 = CALL_EXPR_ARG (exp, 0);
14322 tree arg1 = CALL_EXPR_ARG (exp, 1);
14323 machine_mode tmode = insn_data[icode].operand[0].mode;
14324 machine_mode mode0 = Pmode;
14325 machine_mode mode1 = Pmode;
14326 rtx op0 = expand_normal (arg0);
14327 rtx op1 = expand_normal (arg1);
14329 if (icode == CODE_FOR_nothing)
14330 /* Builtin not supported on this processor. */
14331 return 0;
14333 /* If we got invalid arguments bail out before generating bad rtl. */
14334 if (arg0 == error_mark_node || arg1 == error_mark_node)
14335 return const0_rtx;
14337 if (target == 0
14338 || GET_MODE (target) != tmode
14339 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14340 target = gen_reg_rtx (tmode);
14342 op1 = copy_to_mode_reg (mode1, op1);
14344 if (op0 == const0_rtx)
14346 addr = gen_rtx_MEM (tmode, op1);
14348 else
14350 op0 = copy_to_mode_reg (mode0, op0);
14351 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14354 pat = GEN_FCN (icode) (target, addr);
14356 if (! pat)
14357 return 0;
14358 emit_insn (pat);
14360 return target;
14363 /* Return a constant vector for use as a little-endian permute control vector
14364 to reverse the order of elements of the given vector mode. */
14365 static rtx
14366 swap_selector_for_mode (machine_mode mode)
14368 /* These are little endian vectors, so their elements are reversed
14369 from what you would normally expect for a permute control vector. */
14370 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14371 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14372 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14373 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14374 unsigned int *swaparray, i;
14375 rtx perm[16];
14377 switch (mode)
14379 case V2DFmode:
14380 case V2DImode:
14381 swaparray = swap2;
14382 break;
14383 case V4SFmode:
14384 case V4SImode:
14385 swaparray = swap4;
14386 break;
14387 case V8HImode:
14388 swaparray = swap8;
14389 break;
14390 case V16QImode:
14391 swaparray = swap16;
14392 break;
14393 default:
14394 gcc_unreachable ();
14397 for (i = 0; i < 16; ++i)
14398 perm[i] = GEN_INT (swaparray[i]);
14400 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14403 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14404 with -maltivec=be specified. Issue the load followed by an element-
14405 reversing permute. */
14406 void
14407 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14409 rtx tmp = gen_reg_rtx (mode);
14410 rtx load = gen_rtx_SET (tmp, op1);
14411 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14412 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14413 rtx sel = swap_selector_for_mode (mode);
14414 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14416 gcc_assert (REG_P (op0));
14417 emit_insn (par);
14418 emit_insn (gen_rtx_SET (op0, vperm));
14421 /* Generate code for a "stvxl" built-in for a little endian target with
14422 -maltivec=be specified. Issue the store preceded by an element-reversing
14423 permute. */
14424 void
14425 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14427 rtx tmp = gen_reg_rtx (mode);
14428 rtx store = gen_rtx_SET (op0, tmp);
14429 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14430 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14431 rtx sel = swap_selector_for_mode (mode);
14432 rtx vperm;
14434 gcc_assert (REG_P (op1));
14435 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14436 emit_insn (gen_rtx_SET (tmp, vperm));
14437 emit_insn (par);
14440 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14441 specified. Issue the store preceded by an element-reversing permute. */
14442 void
14443 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14445 machine_mode inner_mode = GET_MODE_INNER (mode);
14446 rtx tmp = gen_reg_rtx (mode);
14447 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14448 rtx sel = swap_selector_for_mode (mode);
14449 rtx vperm;
14451 gcc_assert (REG_P (op1));
14452 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14453 emit_insn (gen_rtx_SET (tmp, vperm));
14454 emit_insn (gen_rtx_SET (op0, stvx));
14457 static rtx
14458 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14460 rtx pat, addr;
14461 tree arg0 = CALL_EXPR_ARG (exp, 0);
14462 tree arg1 = CALL_EXPR_ARG (exp, 1);
14463 machine_mode tmode = insn_data[icode].operand[0].mode;
14464 machine_mode mode0 = Pmode;
14465 machine_mode mode1 = Pmode;
14466 rtx op0 = expand_normal (arg0);
14467 rtx op1 = expand_normal (arg1);
14469 if (icode == CODE_FOR_nothing)
14470 /* Builtin not supported on this processor. */
14471 return 0;
14473 /* If we got invalid arguments bail out before generating bad rtl. */
14474 if (arg0 == error_mark_node || arg1 == error_mark_node)
14475 return const0_rtx;
14477 if (target == 0
14478 || GET_MODE (target) != tmode
14479 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14480 target = gen_reg_rtx (tmode);
14482 op1 = copy_to_mode_reg (mode1, op1);
14484 /* For LVX, express the RTL accurately by ANDing the address with -16.
14485 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14486 so the raw address is fine. */
14487 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14488 || icode == CODE_FOR_altivec_lvx_v2di_2op
14489 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14490 || icode == CODE_FOR_altivec_lvx_v4si_2op
14491 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14492 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14494 rtx rawaddr;
14495 if (op0 == const0_rtx)
14496 rawaddr = op1;
14497 else
14499 op0 = copy_to_mode_reg (mode0, op0);
14500 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14502 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14503 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14505 /* For -maltivec=be, emit the load and follow it up with a
14506 permute to swap the elements. */
14507 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14509 rtx temp = gen_reg_rtx (tmode);
14510 emit_insn (gen_rtx_SET (temp, addr));
14512 rtx sel = swap_selector_for_mode (tmode);
14513 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14514 UNSPEC_VPERM);
14515 emit_insn (gen_rtx_SET (target, vperm));
14517 else
14518 emit_insn (gen_rtx_SET (target, addr));
14520 else
14522 if (op0 == const0_rtx)
14523 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14524 else
14526 op0 = copy_to_mode_reg (mode0, op0);
14527 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14528 gen_rtx_PLUS (Pmode, op1, op0));
14531 pat = GEN_FCN (icode) (target, addr);
14532 if (! pat)
14533 return 0;
14534 emit_insn (pat);
14537 return target;
14540 static rtx
14541 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14543 tree arg0 = CALL_EXPR_ARG (exp, 0);
14544 tree arg1 = CALL_EXPR_ARG (exp, 1);
14545 tree arg2 = CALL_EXPR_ARG (exp, 2);
14546 rtx op0 = expand_normal (arg0);
14547 rtx op1 = expand_normal (arg1);
14548 rtx op2 = expand_normal (arg2);
14549 rtx pat, addr;
14550 machine_mode tmode = insn_data[icode].operand[0].mode;
14551 machine_mode mode1 = Pmode;
14552 machine_mode mode2 = Pmode;
14554 /* Invalid arguments. Bail before doing anything stoopid! */
14555 if (arg0 == error_mark_node
14556 || arg1 == error_mark_node
14557 || arg2 == error_mark_node)
14558 return const0_rtx;
14560 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14561 op0 = copy_to_mode_reg (tmode, op0);
14563 op2 = copy_to_mode_reg (mode2, op2);
14565 if (op1 == const0_rtx)
14567 addr = gen_rtx_MEM (tmode, op2);
14569 else
14571 op1 = copy_to_mode_reg (mode1, op1);
14572 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14575 pat = GEN_FCN (icode) (addr, op0);
14576 if (pat)
14577 emit_insn (pat);
14578 return NULL_RTX;
14581 static rtx
14582 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14584 rtx pat;
14585 tree arg0 = CALL_EXPR_ARG (exp, 0);
14586 tree arg1 = CALL_EXPR_ARG (exp, 1);
14587 tree arg2 = CALL_EXPR_ARG (exp, 2);
14588 rtx op0 = expand_normal (arg0);
14589 rtx op1 = expand_normal (arg1);
14590 rtx op2 = expand_normal (arg2);
14591 machine_mode mode0 = insn_data[icode].operand[0].mode;
14592 machine_mode mode1 = insn_data[icode].operand[1].mode;
14593 machine_mode mode2 = insn_data[icode].operand[2].mode;
14595 if (icode == CODE_FOR_nothing)
14596 /* Builtin not supported on this processor. */
14597 return NULL_RTX;
14599 /* If we got invalid arguments bail out before generating bad rtl. */
14600 if (arg0 == error_mark_node
14601 || arg1 == error_mark_node
14602 || arg2 == error_mark_node)
14603 return NULL_RTX;
14605 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14606 op0 = copy_to_mode_reg (mode0, op0);
14607 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14608 op1 = copy_to_mode_reg (mode1, op1);
14609 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14610 op2 = copy_to_mode_reg (mode2, op2);
14612 pat = GEN_FCN (icode) (op0, op1, op2);
14613 if (pat)
14614 emit_insn (pat);
14616 return NULL_RTX;
14619 static rtx
14620 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14622 tree arg0 = CALL_EXPR_ARG (exp, 0);
14623 tree arg1 = CALL_EXPR_ARG (exp, 1);
14624 tree arg2 = CALL_EXPR_ARG (exp, 2);
14625 rtx op0 = expand_normal (arg0);
14626 rtx op1 = expand_normal (arg1);
14627 rtx op2 = expand_normal (arg2);
14628 rtx pat, addr, rawaddr;
14629 machine_mode tmode = insn_data[icode].operand[0].mode;
14630 machine_mode smode = insn_data[icode].operand[1].mode;
14631 machine_mode mode1 = Pmode;
14632 machine_mode mode2 = Pmode;
14634 /* Invalid arguments. Bail before doing anything stoopid! */
14635 if (arg0 == error_mark_node
14636 || arg1 == error_mark_node
14637 || arg2 == error_mark_node)
14638 return const0_rtx;
14640 op2 = copy_to_mode_reg (mode2, op2);
14642 /* For STVX, express the RTL accurately by ANDing the address with -16.
14643 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14644 so the raw address is fine. */
14645 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14646 || icode == CODE_FOR_altivec_stvx_v2di_2op
14647 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14648 || icode == CODE_FOR_altivec_stvx_v4si_2op
14649 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14650 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14652 if (op1 == const0_rtx)
14653 rawaddr = op2;
14654 else
14656 op1 = copy_to_mode_reg (mode1, op1);
14657 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14660 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14661 addr = gen_rtx_MEM (tmode, addr);
14663 op0 = copy_to_mode_reg (tmode, op0);
14665 /* For -maltivec=be, emit a permute to swap the elements, followed
14666 by the store. */
14667 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14669 rtx temp = gen_reg_rtx (tmode);
14670 rtx sel = swap_selector_for_mode (tmode);
14671 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14672 UNSPEC_VPERM);
14673 emit_insn (gen_rtx_SET (temp, vperm));
14674 emit_insn (gen_rtx_SET (addr, temp));
14676 else
14677 emit_insn (gen_rtx_SET (addr, op0));
14679 else
14681 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14682 op0 = copy_to_mode_reg (smode, op0);
14684 if (op1 == const0_rtx)
14685 addr = gen_rtx_MEM (tmode, op2);
14686 else
14688 op1 = copy_to_mode_reg (mode1, op1);
14689 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14692 pat = GEN_FCN (icode) (addr, op0);
14693 if (pat)
14694 emit_insn (pat);
14697 return NULL_RTX;
14700 /* Return the appropriate SPR number associated with the given builtin. */
14701 static inline HOST_WIDE_INT
14702 htm_spr_num (enum rs6000_builtins code)
14704 if (code == HTM_BUILTIN_GET_TFHAR
14705 || code == HTM_BUILTIN_SET_TFHAR)
14706 return TFHAR_SPR;
14707 else if (code == HTM_BUILTIN_GET_TFIAR
14708 || code == HTM_BUILTIN_SET_TFIAR)
14709 return TFIAR_SPR;
14710 else if (code == HTM_BUILTIN_GET_TEXASR
14711 || code == HTM_BUILTIN_SET_TEXASR)
14712 return TEXASR_SPR;
14713 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14714 || code == HTM_BUILTIN_SET_TEXASRU);
14715 return TEXASRU_SPR;
14718 /* Return the appropriate SPR regno associated with the given builtin. */
14719 static inline HOST_WIDE_INT
14720 htm_spr_regno (enum rs6000_builtins code)
14722 if (code == HTM_BUILTIN_GET_TFHAR
14723 || code == HTM_BUILTIN_SET_TFHAR)
14724 return TFHAR_REGNO;
14725 else if (code == HTM_BUILTIN_GET_TFIAR
14726 || code == HTM_BUILTIN_SET_TFIAR)
14727 return TFIAR_REGNO;
14728 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14729 || code == HTM_BUILTIN_SET_TEXASR
14730 || code == HTM_BUILTIN_GET_TEXASRU
14731 || code == HTM_BUILTIN_SET_TEXASRU);
14732 return TEXASR_REGNO;
14735 /* Return the correct ICODE value depending on whether we are
14736 setting or reading the HTM SPRs. */
14737 static inline enum insn_code
14738 rs6000_htm_spr_icode (bool nonvoid)
14740 if (nonvoid)
14741 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14742 else
14743 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14746 /* Expand the HTM builtin in EXP and store the result in TARGET.
14747 Store true in *EXPANDEDP if we found a builtin to expand. */
14748 static rtx
14749 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14751 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14752 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14753 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14754 const struct builtin_description *d;
14755 size_t i;
14757 *expandedp = true;
14759 if (!TARGET_POWERPC64
14760 && (fcode == HTM_BUILTIN_TABORTDC
14761 || fcode == HTM_BUILTIN_TABORTDCI))
14763 size_t uns_fcode = (size_t)fcode;
14764 const char *name = rs6000_builtin_info[uns_fcode].name;
14765 error ("builtin %s is only valid in 64-bit mode", name);
14766 return const0_rtx;
14769 /* Expand the HTM builtins. */
14770 d = bdesc_htm;
14771 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14772 if (d->code == fcode)
14774 rtx op[MAX_HTM_OPERANDS], pat;
14775 int nopnds = 0;
14776 tree arg;
14777 call_expr_arg_iterator iter;
14778 unsigned attr = rs6000_builtin_info[fcode].attr;
14779 enum insn_code icode = d->icode;
14780 const struct insn_operand_data *insn_op;
14781 bool uses_spr = (attr & RS6000_BTC_SPR);
14782 rtx cr = NULL_RTX;
14784 if (uses_spr)
14785 icode = rs6000_htm_spr_icode (nonvoid);
14786 insn_op = &insn_data[icode].operand[0];
14788 if (nonvoid)
14790 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14791 if (!target
14792 || GET_MODE (target) != tmode
14793 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14794 target = gen_reg_rtx (tmode);
14795 if (uses_spr)
14796 op[nopnds++] = target;
14799 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14801 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14802 return const0_rtx;
14804 insn_op = &insn_data[icode].operand[nopnds];
14806 op[nopnds] = expand_normal (arg);
14808 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14810 if (!strcmp (insn_op->constraint, "n"))
14812 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14813 if (!CONST_INT_P (op[nopnds]))
14814 error ("argument %d must be an unsigned literal", arg_num);
14815 else
14816 error ("argument %d is an unsigned literal that is "
14817 "out of range", arg_num);
14818 return const0_rtx;
14820 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14823 nopnds++;
14826 /* Handle the builtins for extended mnemonics. These accept
14827 no arguments, but map to builtins that take arguments. */
14828 switch (fcode)
14830 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14831 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14832 op[nopnds++] = GEN_INT (1);
14833 if (flag_checking)
14834 attr |= RS6000_BTC_UNARY;
14835 break;
14836 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14837 op[nopnds++] = GEN_INT (0);
14838 if (flag_checking)
14839 attr |= RS6000_BTC_UNARY;
14840 break;
14841 default:
14842 break;
14845 /* If this builtin accesses SPRs, then pass in the appropriate
14846 SPR number and SPR regno as the last two operands. */
14847 if (uses_spr)
14849 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14850 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14851 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14853 /* If this builtin accesses a CR, then pass in a scratch
14854 CR as the last operand. */
14855 else if (attr & RS6000_BTC_CR)
14856 { cr = gen_reg_rtx (CCmode);
14857 op[nopnds++] = cr;
14860 if (flag_checking)
14862 int expected_nopnds = 0;
14863 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14864 expected_nopnds = 1;
14865 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14866 expected_nopnds = 2;
14867 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14868 expected_nopnds = 3;
14869 if (!(attr & RS6000_BTC_VOID))
14870 expected_nopnds += 1;
14871 if (uses_spr)
14872 expected_nopnds += 2;
14874 gcc_assert (nopnds == expected_nopnds
14875 && nopnds <= MAX_HTM_OPERANDS);
14878 switch (nopnds)
14880 case 1:
14881 pat = GEN_FCN (icode) (op[0]);
14882 break;
14883 case 2:
14884 pat = GEN_FCN (icode) (op[0], op[1]);
14885 break;
14886 case 3:
14887 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14888 break;
14889 case 4:
14890 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14891 break;
14892 default:
14893 gcc_unreachable ();
14895 if (!pat)
14896 return NULL_RTX;
14897 emit_insn (pat);
14899 if (attr & RS6000_BTC_CR)
14901 if (fcode == HTM_BUILTIN_TBEGIN)
14903 /* Emit code to set TARGET to true or false depending on
14904 whether the tbegin. instruction successfully or failed
14905 to start a transaction. We do this by placing the 1's
14906 complement of CR's EQ bit into TARGET. */
14907 rtx scratch = gen_reg_rtx (SImode);
14908 emit_insn (gen_rtx_SET (scratch,
14909 gen_rtx_EQ (SImode, cr,
14910 const0_rtx)));
14911 emit_insn (gen_rtx_SET (target,
14912 gen_rtx_XOR (SImode, scratch,
14913 GEN_INT (1))));
14915 else
14917 /* Emit code to copy the 4-bit condition register field
14918 CR into the least significant end of register TARGET. */
14919 rtx scratch1 = gen_reg_rtx (SImode);
14920 rtx scratch2 = gen_reg_rtx (SImode);
14921 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14922 emit_insn (gen_movcc (subreg, cr));
14923 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14924 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14928 if (nonvoid)
14929 return target;
14930 return const0_rtx;
14933 *expandedp = false;
14934 return NULL_RTX;
14937 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14939 static rtx
14940 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14941 rtx target)
14943 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14944 if (fcode == RS6000_BUILTIN_CPU_INIT)
14945 return const0_rtx;
14947 if (target == 0 || GET_MODE (target) != SImode)
14948 target = gen_reg_rtx (SImode);
14950 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14951 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14952 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14953 to a STRING_CST. */
14954 if (TREE_CODE (arg) == ARRAY_REF
14955 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14956 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14957 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14958 arg = TREE_OPERAND (arg, 0);
14960 if (TREE_CODE (arg) != STRING_CST)
14962 error ("builtin %s only accepts a string argument",
14963 rs6000_builtin_info[(size_t) fcode].name);
14964 return const0_rtx;
14967 if (fcode == RS6000_BUILTIN_CPU_IS)
14969 const char *cpu = TREE_STRING_POINTER (arg);
14970 rtx cpuid = NULL_RTX;
14971 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14972 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14974 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14975 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14976 break;
14978 if (cpuid == NULL_RTX)
14980 /* Invalid CPU argument. */
14981 error ("cpu %s is an invalid argument to builtin %s",
14982 cpu, rs6000_builtin_info[(size_t) fcode].name);
14983 return const0_rtx;
14986 rtx platform = gen_reg_rtx (SImode);
14987 rtx tcbmem = gen_const_mem (SImode,
14988 gen_rtx_PLUS (Pmode,
14989 gen_rtx_REG (Pmode, TLS_REGNUM),
14990 GEN_INT (TCB_PLATFORM_OFFSET)));
14991 emit_move_insn (platform, tcbmem);
14992 emit_insn (gen_eqsi3 (target, platform, cpuid));
14994 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14996 const char *hwcap = TREE_STRING_POINTER (arg);
14997 rtx mask = NULL_RTX;
14998 int hwcap_offset;
14999 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15000 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15002 mask = GEN_INT (cpu_supports_info[i].mask);
15003 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15004 break;
15006 if (mask == NULL_RTX)
15008 /* Invalid HWCAP argument. */
15009 error ("hwcap %s is an invalid argument to builtin %s",
15010 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15011 return const0_rtx;
15014 rtx tcb_hwcap = gen_reg_rtx (SImode);
15015 rtx tcbmem = gen_const_mem (SImode,
15016 gen_rtx_PLUS (Pmode,
15017 gen_rtx_REG (Pmode, TLS_REGNUM),
15018 GEN_INT (hwcap_offset)));
15019 emit_move_insn (tcb_hwcap, tcbmem);
15020 rtx scratch1 = gen_reg_rtx (SImode);
15021 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15022 rtx scratch2 = gen_reg_rtx (SImode);
15023 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15024 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15026 else
15027 gcc_unreachable ();
15029 /* Record that we have expanded a CPU builtin, so that we can later
15030 emit a reference to the special symbol exported by LIBC to ensure we
15031 do not link against an old LIBC that doesn't support this feature. */
15032 cpu_builtin_p = true;
15034 #else
15035 warning (0, "%s needs GLIBC (2.23 and newer) that exports hardware "
15036 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
15038 /* For old LIBCs, always return FALSE. */
15039 emit_move_insn (target, GEN_INT (0));
15040 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15042 return target;
15045 static rtx
15046 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15048 rtx pat;
15049 tree arg0 = CALL_EXPR_ARG (exp, 0);
15050 tree arg1 = CALL_EXPR_ARG (exp, 1);
15051 tree arg2 = CALL_EXPR_ARG (exp, 2);
15052 rtx op0 = expand_normal (arg0);
15053 rtx op1 = expand_normal (arg1);
15054 rtx op2 = expand_normal (arg2);
15055 machine_mode tmode = insn_data[icode].operand[0].mode;
15056 machine_mode mode0 = insn_data[icode].operand[1].mode;
15057 machine_mode mode1 = insn_data[icode].operand[2].mode;
15058 machine_mode mode2 = insn_data[icode].operand[3].mode;
15060 if (icode == CODE_FOR_nothing)
15061 /* Builtin not supported on this processor. */
15062 return 0;
15064 /* If we got invalid arguments bail out before generating bad rtl. */
15065 if (arg0 == error_mark_node
15066 || arg1 == error_mark_node
15067 || arg2 == error_mark_node)
15068 return const0_rtx;
15070 /* Check and prepare argument depending on the instruction code.
15072 Note that a switch statement instead of the sequence of tests
15073 would be incorrect as many of the CODE_FOR values could be
15074 CODE_FOR_nothing and that would yield multiple alternatives
15075 with identical values. We'd never reach here at runtime in
15076 this case. */
15077 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15078 || icode == CODE_FOR_altivec_vsldoi_v2df
15079 || icode == CODE_FOR_altivec_vsldoi_v4si
15080 || icode == CODE_FOR_altivec_vsldoi_v8hi
15081 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15083 /* Only allow 4-bit unsigned literals. */
15084 STRIP_NOPS (arg2);
15085 if (TREE_CODE (arg2) != INTEGER_CST
15086 || TREE_INT_CST_LOW (arg2) & ~0xf)
15088 error ("argument 3 must be a 4-bit unsigned literal");
15089 return CONST0_RTX (tmode);
15092 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15093 || icode == CODE_FOR_vsx_xxpermdi_v2di
15094 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15095 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15096 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15097 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15098 || icode == CODE_FOR_vsx_xxpermdi_v4si
15099 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15100 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15101 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15102 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15103 || icode == CODE_FOR_vsx_xxsldwi_v4si
15104 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15105 || icode == CODE_FOR_vsx_xxsldwi_v2di
15106 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15108 /* Only allow 2-bit unsigned literals. */
15109 STRIP_NOPS (arg2);
15110 if (TREE_CODE (arg2) != INTEGER_CST
15111 || TREE_INT_CST_LOW (arg2) & ~0x3)
15113 error ("argument 3 must be a 2-bit unsigned literal");
15114 return CONST0_RTX (tmode);
15117 else if (icode == CODE_FOR_vsx_set_v2df
15118 || icode == CODE_FOR_vsx_set_v2di
15119 || icode == CODE_FOR_bcdadd
15120 || icode == CODE_FOR_bcdadd_lt
15121 || icode == CODE_FOR_bcdadd_eq
15122 || icode == CODE_FOR_bcdadd_gt
15123 || icode == CODE_FOR_bcdsub
15124 || icode == CODE_FOR_bcdsub_lt
15125 || icode == CODE_FOR_bcdsub_eq
15126 || icode == CODE_FOR_bcdsub_gt)
15128 /* Only allow 1-bit unsigned literals. */
15129 STRIP_NOPS (arg2);
15130 if (TREE_CODE (arg2) != INTEGER_CST
15131 || TREE_INT_CST_LOW (arg2) & ~0x1)
15133 error ("argument 3 must be a 1-bit unsigned literal");
15134 return CONST0_RTX (tmode);
15137 else if (icode == CODE_FOR_dfp_ddedpd_dd
15138 || icode == CODE_FOR_dfp_ddedpd_td)
15140 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15141 STRIP_NOPS (arg0);
15142 if (TREE_CODE (arg0) != INTEGER_CST
15143 || TREE_INT_CST_LOW (arg2) & ~0x3)
15145 error ("argument 1 must be 0 or 2");
15146 return CONST0_RTX (tmode);
15149 else if (icode == CODE_FOR_dfp_denbcd_dd
15150 || icode == CODE_FOR_dfp_denbcd_td)
15152 /* Only allow 1-bit unsigned literals. */
15153 STRIP_NOPS (arg0);
15154 if (TREE_CODE (arg0) != INTEGER_CST
15155 || TREE_INT_CST_LOW (arg0) & ~0x1)
15157 error ("argument 1 must be a 1-bit unsigned literal");
15158 return CONST0_RTX (tmode);
15161 else if (icode == CODE_FOR_dfp_dscli_dd
15162 || icode == CODE_FOR_dfp_dscli_td
15163 || icode == CODE_FOR_dfp_dscri_dd
15164 || icode == CODE_FOR_dfp_dscri_td)
15166 /* Only allow 6-bit unsigned literals. */
15167 STRIP_NOPS (arg1);
15168 if (TREE_CODE (arg1) != INTEGER_CST
15169 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15171 error ("argument 2 must be a 6-bit unsigned literal");
15172 return CONST0_RTX (tmode);
15175 else if (icode == CODE_FOR_crypto_vshasigmaw
15176 || icode == CODE_FOR_crypto_vshasigmad)
15178 /* Check whether the 2nd and 3rd arguments are integer constants and in
15179 range and prepare arguments. */
15180 STRIP_NOPS (arg1);
15181 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15183 error ("argument 2 must be 0 or 1");
15184 return CONST0_RTX (tmode);
15187 STRIP_NOPS (arg2);
15188 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16))
15190 error ("argument 3 must be in the range 0..15");
15191 return CONST0_RTX (tmode);
15195 if (target == 0
15196 || GET_MODE (target) != tmode
15197 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15198 target = gen_reg_rtx (tmode);
15200 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15201 op0 = copy_to_mode_reg (mode0, op0);
15202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15203 op1 = copy_to_mode_reg (mode1, op1);
15204 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15205 op2 = copy_to_mode_reg (mode2, op2);
15207 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15208 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15209 else
15210 pat = GEN_FCN (icode) (target, op0, op1, op2);
15211 if (! pat)
15212 return 0;
15213 emit_insn (pat);
15215 return target;
15218 /* Expand the lvx builtins. */
15219 static rtx
15220 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15222 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15223 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15224 tree arg0;
15225 machine_mode tmode, mode0;
15226 rtx pat, op0;
15227 enum insn_code icode;
15229 switch (fcode)
15231 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15232 icode = CODE_FOR_vector_altivec_load_v16qi;
15233 break;
15234 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15235 icode = CODE_FOR_vector_altivec_load_v8hi;
15236 break;
15237 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15238 icode = CODE_FOR_vector_altivec_load_v4si;
15239 break;
15240 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15241 icode = CODE_FOR_vector_altivec_load_v4sf;
15242 break;
15243 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15244 icode = CODE_FOR_vector_altivec_load_v2df;
15245 break;
15246 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15247 icode = CODE_FOR_vector_altivec_load_v2di;
15248 break;
15249 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15250 icode = CODE_FOR_vector_altivec_load_v1ti;
15251 break;
15252 default:
15253 *expandedp = false;
15254 return NULL_RTX;
15257 *expandedp = true;
15259 arg0 = CALL_EXPR_ARG (exp, 0);
15260 op0 = expand_normal (arg0);
15261 tmode = insn_data[icode].operand[0].mode;
15262 mode0 = insn_data[icode].operand[1].mode;
15264 if (target == 0
15265 || GET_MODE (target) != tmode
15266 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15267 target = gen_reg_rtx (tmode);
15269 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15270 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15272 pat = GEN_FCN (icode) (target, op0);
15273 if (! pat)
15274 return 0;
15275 emit_insn (pat);
15276 return target;
15279 /* Expand the stvx builtins. */
15280 static rtx
15281 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15282 bool *expandedp)
15284 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15285 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15286 tree arg0, arg1;
15287 machine_mode mode0, mode1;
15288 rtx pat, op0, op1;
15289 enum insn_code icode;
15291 switch (fcode)
15293 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15294 icode = CODE_FOR_vector_altivec_store_v16qi;
15295 break;
15296 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15297 icode = CODE_FOR_vector_altivec_store_v8hi;
15298 break;
15299 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15300 icode = CODE_FOR_vector_altivec_store_v4si;
15301 break;
15302 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15303 icode = CODE_FOR_vector_altivec_store_v4sf;
15304 break;
15305 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15306 icode = CODE_FOR_vector_altivec_store_v2df;
15307 break;
15308 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15309 icode = CODE_FOR_vector_altivec_store_v2di;
15310 break;
15311 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15312 icode = CODE_FOR_vector_altivec_store_v1ti;
15313 break;
15314 default:
15315 *expandedp = false;
15316 return NULL_RTX;
15319 arg0 = CALL_EXPR_ARG (exp, 0);
15320 arg1 = CALL_EXPR_ARG (exp, 1);
15321 op0 = expand_normal (arg0);
15322 op1 = expand_normal (arg1);
15323 mode0 = insn_data[icode].operand[0].mode;
15324 mode1 = insn_data[icode].operand[1].mode;
15326 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15327 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15328 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15329 op1 = copy_to_mode_reg (mode1, op1);
15331 pat = GEN_FCN (icode) (op0, op1);
15332 if (pat)
15333 emit_insn (pat);
15335 *expandedp = true;
15336 return NULL_RTX;
15339 /* Expand the dst builtins. */
15340 static rtx
15341 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15342 bool *expandedp)
15344 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15345 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15346 tree arg0, arg1, arg2;
15347 machine_mode mode0, mode1;
15348 rtx pat, op0, op1, op2;
15349 const struct builtin_description *d;
15350 size_t i;
15352 *expandedp = false;
15354 /* Handle DST variants. */
15355 d = bdesc_dst;
15356 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15357 if (d->code == fcode)
15359 arg0 = CALL_EXPR_ARG (exp, 0);
15360 arg1 = CALL_EXPR_ARG (exp, 1);
15361 arg2 = CALL_EXPR_ARG (exp, 2);
15362 op0 = expand_normal (arg0);
15363 op1 = expand_normal (arg1);
15364 op2 = expand_normal (arg2);
15365 mode0 = insn_data[d->icode].operand[0].mode;
15366 mode1 = insn_data[d->icode].operand[1].mode;
15368 /* Invalid arguments, bail out before generating bad rtl. */
15369 if (arg0 == error_mark_node
15370 || arg1 == error_mark_node
15371 || arg2 == error_mark_node)
15372 return const0_rtx;
15374 *expandedp = true;
15375 STRIP_NOPS (arg2);
15376 if (TREE_CODE (arg2) != INTEGER_CST
15377 || TREE_INT_CST_LOW (arg2) & ~0x3)
15379 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15380 return const0_rtx;
15383 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15384 op0 = copy_to_mode_reg (Pmode, op0);
15385 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15386 op1 = copy_to_mode_reg (mode1, op1);
15388 pat = GEN_FCN (d->icode) (op0, op1, op2);
15389 if (pat != 0)
15390 emit_insn (pat);
15392 return NULL_RTX;
15395 return NULL_RTX;
15398 /* Expand vec_init builtin. */
15399 static rtx
15400 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15402 machine_mode tmode = TYPE_MODE (type);
15403 machine_mode inner_mode = GET_MODE_INNER (tmode);
15404 int i, n_elt = GET_MODE_NUNITS (tmode);
15406 gcc_assert (VECTOR_MODE_P (tmode));
15407 gcc_assert (n_elt == call_expr_nargs (exp));
15409 if (!target || !register_operand (target, tmode))
15410 target = gen_reg_rtx (tmode);
15412 /* If we have a vector compromised of a single element, such as V1TImode, do
15413 the initialization directly. */
15414 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15416 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15417 emit_move_insn (target, gen_lowpart (tmode, x));
15419 else
15421 rtvec v = rtvec_alloc (n_elt);
15423 for (i = 0; i < n_elt; ++i)
15425 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15426 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15429 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15432 return target;
15435 /* Return the integer constant in ARG. Constrain it to be in the range
15436 of the subparts of VEC_TYPE; issue an error if not. */
15438 static int
15439 get_element_number (tree vec_type, tree arg)
15441 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15443 if (!tree_fits_uhwi_p (arg)
15444 || (elt = tree_to_uhwi (arg), elt > max))
15446 error ("selector must be an integer constant in the range 0..%wi", max);
15447 return 0;
15450 return elt;
15453 /* Expand vec_set builtin. */
15454 static rtx
15455 altivec_expand_vec_set_builtin (tree exp)
15457 machine_mode tmode, mode1;
15458 tree arg0, arg1, arg2;
15459 int elt;
15460 rtx op0, op1;
15462 arg0 = CALL_EXPR_ARG (exp, 0);
15463 arg1 = CALL_EXPR_ARG (exp, 1);
15464 arg2 = CALL_EXPR_ARG (exp, 2);
15466 tmode = TYPE_MODE (TREE_TYPE (arg0));
15467 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15468 gcc_assert (VECTOR_MODE_P (tmode));
15470 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15471 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15472 elt = get_element_number (TREE_TYPE (arg0), arg2);
15474 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15475 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15477 op0 = force_reg (tmode, op0);
15478 op1 = force_reg (mode1, op1);
15480 rs6000_expand_vector_set (op0, op1, elt);
15482 return op0;
15485 /* Expand vec_ext builtin. */
15486 static rtx
15487 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15489 machine_mode tmode, mode0;
15490 tree arg0, arg1;
15491 rtx op0;
15492 rtx op1;
15494 arg0 = CALL_EXPR_ARG (exp, 0);
15495 arg1 = CALL_EXPR_ARG (exp, 1);
15497 op0 = expand_normal (arg0);
15498 op1 = expand_normal (arg1);
15500 /* Call get_element_number to validate arg1 if it is a constant. */
15501 if (TREE_CODE (arg1) == INTEGER_CST)
15502 (void) get_element_number (TREE_TYPE (arg0), arg1);
15504 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15505 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15506 gcc_assert (VECTOR_MODE_P (mode0));
15508 op0 = force_reg (mode0, op0);
15510 if (optimize || !target || !register_operand (target, tmode))
15511 target = gen_reg_rtx (tmode);
15513 rs6000_expand_vector_extract (target, op0, op1);
15515 return target;
15518 /* Expand the builtin in EXP and store the result in TARGET. Store
15519 true in *EXPANDEDP if we found a builtin to expand. */
15520 static rtx
15521 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15523 const struct builtin_description *d;
15524 size_t i;
15525 enum insn_code icode;
15526 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15527 tree arg0, arg1, arg2;
15528 rtx op0, pat;
15529 machine_mode tmode, mode0;
15530 enum rs6000_builtins fcode
15531 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15533 if (rs6000_overloaded_builtin_p (fcode))
15535 *expandedp = true;
15536 error ("unresolved overload for Altivec builtin %qF", fndecl);
15538 /* Given it is invalid, just generate a normal call. */
15539 return expand_call (exp, target, false);
15542 target = altivec_expand_ld_builtin (exp, target, expandedp);
15543 if (*expandedp)
15544 return target;
15546 target = altivec_expand_st_builtin (exp, target, expandedp);
15547 if (*expandedp)
15548 return target;
15550 target = altivec_expand_dst_builtin (exp, target, expandedp);
15551 if (*expandedp)
15552 return target;
15554 *expandedp = true;
15556 switch (fcode)
15558 case ALTIVEC_BUILTIN_STVX_V2DF:
15559 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15560 case ALTIVEC_BUILTIN_STVX_V2DI:
15561 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15562 case ALTIVEC_BUILTIN_STVX_V4SF:
15563 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15564 case ALTIVEC_BUILTIN_STVX:
15565 case ALTIVEC_BUILTIN_STVX_V4SI:
15566 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15567 case ALTIVEC_BUILTIN_STVX_V8HI:
15568 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15569 case ALTIVEC_BUILTIN_STVX_V16QI:
15570 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15571 case ALTIVEC_BUILTIN_STVEBX:
15572 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15573 case ALTIVEC_BUILTIN_STVEHX:
15574 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15575 case ALTIVEC_BUILTIN_STVEWX:
15576 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15577 case ALTIVEC_BUILTIN_STVXL_V2DF:
15578 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15579 case ALTIVEC_BUILTIN_STVXL_V2DI:
15580 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15581 case ALTIVEC_BUILTIN_STVXL_V4SF:
15582 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15583 case ALTIVEC_BUILTIN_STVXL:
15584 case ALTIVEC_BUILTIN_STVXL_V4SI:
15585 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15586 case ALTIVEC_BUILTIN_STVXL_V8HI:
15587 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15588 case ALTIVEC_BUILTIN_STVXL_V16QI:
15589 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15591 case ALTIVEC_BUILTIN_STVLX:
15592 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15593 case ALTIVEC_BUILTIN_STVLXL:
15594 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15595 case ALTIVEC_BUILTIN_STVRX:
15596 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15597 case ALTIVEC_BUILTIN_STVRXL:
15598 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15600 case P9V_BUILTIN_STXVL:
15601 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15603 case VSX_BUILTIN_STXVD2X_V1TI:
15604 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15605 case VSX_BUILTIN_STXVD2X_V2DF:
15606 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15607 case VSX_BUILTIN_STXVD2X_V2DI:
15608 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15609 case VSX_BUILTIN_STXVW4X_V4SF:
15610 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15611 case VSX_BUILTIN_STXVW4X_V4SI:
15612 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15613 case VSX_BUILTIN_STXVW4X_V8HI:
15614 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15615 case VSX_BUILTIN_STXVW4X_V16QI:
15616 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15618 /* For the following on big endian, it's ok to use any appropriate
15619 unaligned-supporting store, so use a generic expander. For
15620 little-endian, the exact element-reversing instruction must
15621 be used. */
15622 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15624 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15625 : CODE_FOR_vsx_st_elemrev_v2df);
15626 return altivec_expand_stv_builtin (code, exp);
15628 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15630 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15631 : CODE_FOR_vsx_st_elemrev_v2di);
15632 return altivec_expand_stv_builtin (code, exp);
15634 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15636 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15637 : CODE_FOR_vsx_st_elemrev_v4sf);
15638 return altivec_expand_stv_builtin (code, exp);
15640 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15642 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15643 : CODE_FOR_vsx_st_elemrev_v4si);
15644 return altivec_expand_stv_builtin (code, exp);
15646 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15648 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15649 : CODE_FOR_vsx_st_elemrev_v8hi);
15650 return altivec_expand_stv_builtin (code, exp);
15652 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15654 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15655 : CODE_FOR_vsx_st_elemrev_v16qi);
15656 return altivec_expand_stv_builtin (code, exp);
15659 case ALTIVEC_BUILTIN_MFVSCR:
15660 icode = CODE_FOR_altivec_mfvscr;
15661 tmode = insn_data[icode].operand[0].mode;
15663 if (target == 0
15664 || GET_MODE (target) != tmode
15665 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15666 target = gen_reg_rtx (tmode);
15668 pat = GEN_FCN (icode) (target);
15669 if (! pat)
15670 return 0;
15671 emit_insn (pat);
15672 return target;
15674 case ALTIVEC_BUILTIN_MTVSCR:
15675 icode = CODE_FOR_altivec_mtvscr;
15676 arg0 = CALL_EXPR_ARG (exp, 0);
15677 op0 = expand_normal (arg0);
15678 mode0 = insn_data[icode].operand[0].mode;
15680 /* If we got invalid arguments bail out before generating bad rtl. */
15681 if (arg0 == error_mark_node)
15682 return const0_rtx;
15684 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15685 op0 = copy_to_mode_reg (mode0, op0);
15687 pat = GEN_FCN (icode) (op0);
15688 if (pat)
15689 emit_insn (pat);
15690 return NULL_RTX;
15692 case ALTIVEC_BUILTIN_DSSALL:
15693 emit_insn (gen_altivec_dssall ());
15694 return NULL_RTX;
15696 case ALTIVEC_BUILTIN_DSS:
15697 icode = CODE_FOR_altivec_dss;
15698 arg0 = CALL_EXPR_ARG (exp, 0);
15699 STRIP_NOPS (arg0);
15700 op0 = expand_normal (arg0);
15701 mode0 = insn_data[icode].operand[0].mode;
15703 /* If we got invalid arguments bail out before generating bad rtl. */
15704 if (arg0 == error_mark_node)
15705 return const0_rtx;
15707 if (TREE_CODE (arg0) != INTEGER_CST
15708 || TREE_INT_CST_LOW (arg0) & ~0x3)
15710 error ("argument to dss must be a 2-bit unsigned literal");
15711 return const0_rtx;
15714 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15715 op0 = copy_to_mode_reg (mode0, op0);
15717 emit_insn (gen_altivec_dss (op0));
15718 return NULL_RTX;
15720 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15721 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15722 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15723 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15724 case VSX_BUILTIN_VEC_INIT_V2DF:
15725 case VSX_BUILTIN_VEC_INIT_V2DI:
15726 case VSX_BUILTIN_VEC_INIT_V1TI:
15727 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15729 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15730 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15731 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15732 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15733 case VSX_BUILTIN_VEC_SET_V2DF:
15734 case VSX_BUILTIN_VEC_SET_V2DI:
15735 case VSX_BUILTIN_VEC_SET_V1TI:
15736 return altivec_expand_vec_set_builtin (exp);
15738 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15739 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15740 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15741 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15742 case VSX_BUILTIN_VEC_EXT_V2DF:
15743 case VSX_BUILTIN_VEC_EXT_V2DI:
15744 case VSX_BUILTIN_VEC_EXT_V1TI:
15745 return altivec_expand_vec_ext_builtin (exp, target);
15747 case P9V_BUILTIN_VEXTRACT4B:
15748 case P9V_BUILTIN_VEC_VEXTRACT4B:
15749 arg1 = CALL_EXPR_ARG (exp, 1);
15750 STRIP_NOPS (arg1);
15752 /* Generate a normal call if it is invalid. */
15753 if (arg1 == error_mark_node)
15754 return expand_call (exp, target, false);
15756 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
15758 error ("second argument to vec_vextract4b must be 0..12");
15759 return expand_call (exp, target, false);
15761 break;
15763 case P9V_BUILTIN_VINSERT4B:
15764 case P9V_BUILTIN_VINSERT4B_DI:
15765 case P9V_BUILTIN_VEC_VINSERT4B:
15766 arg2 = CALL_EXPR_ARG (exp, 2);
15767 STRIP_NOPS (arg2);
15769 /* Generate a normal call if it is invalid. */
15770 if (arg2 == error_mark_node)
15771 return expand_call (exp, target, false);
15773 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
15775 error ("third argument to vec_vinsert4b must be 0..12");
15776 return expand_call (exp, target, false);
15778 break;
15780 default:
15781 break;
15782 /* Fall through. */
15785 /* Expand abs* operations. */
15786 d = bdesc_abs;
15787 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15788 if (d->code == fcode)
15789 return altivec_expand_abs_builtin (d->icode, exp, target);
15791 /* Expand the AltiVec predicates. */
15792 d = bdesc_altivec_preds;
15793 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15794 if (d->code == fcode)
15795 return altivec_expand_predicate_builtin (d->icode, exp, target);
15797 /* LV* are funky. We initialized them differently. */
15798 switch (fcode)
15800 case ALTIVEC_BUILTIN_LVSL:
15801 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15802 exp, target, false);
15803 case ALTIVEC_BUILTIN_LVSR:
15804 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15805 exp, target, false);
15806 case ALTIVEC_BUILTIN_LVEBX:
15807 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15808 exp, target, false);
15809 case ALTIVEC_BUILTIN_LVEHX:
15810 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15811 exp, target, false);
15812 case ALTIVEC_BUILTIN_LVEWX:
15813 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15814 exp, target, false);
15815 case ALTIVEC_BUILTIN_LVXL_V2DF:
15816 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15817 exp, target, false);
15818 case ALTIVEC_BUILTIN_LVXL_V2DI:
15819 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15820 exp, target, false);
15821 case ALTIVEC_BUILTIN_LVXL_V4SF:
15822 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15823 exp, target, false);
15824 case ALTIVEC_BUILTIN_LVXL:
15825 case ALTIVEC_BUILTIN_LVXL_V4SI:
15826 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15827 exp, target, false);
15828 case ALTIVEC_BUILTIN_LVXL_V8HI:
15829 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15830 exp, target, false);
15831 case ALTIVEC_BUILTIN_LVXL_V16QI:
15832 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15833 exp, target, false);
15834 case ALTIVEC_BUILTIN_LVX_V2DF:
15835 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15836 exp, target, false);
15837 case ALTIVEC_BUILTIN_LVX_V2DI:
15838 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15839 exp, target, false);
15840 case ALTIVEC_BUILTIN_LVX_V4SF:
15841 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15842 exp, target, false);
15843 case ALTIVEC_BUILTIN_LVX:
15844 case ALTIVEC_BUILTIN_LVX_V4SI:
15845 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15846 exp, target, false);
15847 case ALTIVEC_BUILTIN_LVX_V8HI:
15848 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15849 exp, target, false);
15850 case ALTIVEC_BUILTIN_LVX_V16QI:
15851 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15852 exp, target, false);
15853 case ALTIVEC_BUILTIN_LVLX:
15854 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15855 exp, target, true);
15856 case ALTIVEC_BUILTIN_LVLXL:
15857 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15858 exp, target, true);
15859 case ALTIVEC_BUILTIN_LVRX:
15860 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15861 exp, target, true);
15862 case ALTIVEC_BUILTIN_LVRXL:
15863 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15864 exp, target, true);
15865 case VSX_BUILTIN_LXVD2X_V1TI:
15866 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15867 exp, target, false);
15868 case VSX_BUILTIN_LXVD2X_V2DF:
15869 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15870 exp, target, false);
15871 case VSX_BUILTIN_LXVD2X_V2DI:
15872 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15873 exp, target, false);
15874 case VSX_BUILTIN_LXVW4X_V4SF:
15875 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15876 exp, target, false);
15877 case VSX_BUILTIN_LXVW4X_V4SI:
15878 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15879 exp, target, false);
15880 case VSX_BUILTIN_LXVW4X_V8HI:
15881 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15882 exp, target, false);
15883 case VSX_BUILTIN_LXVW4X_V16QI:
15884 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15885 exp, target, false);
15886 /* For the following on big endian, it's ok to use any appropriate
15887 unaligned-supporting load, so use a generic expander. For
15888 little-endian, the exact element-reversing instruction must
15889 be used. */
15890 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15892 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15893 : CODE_FOR_vsx_ld_elemrev_v2df);
15894 return altivec_expand_lv_builtin (code, exp, target, false);
15896 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15898 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15899 : CODE_FOR_vsx_ld_elemrev_v2di);
15900 return altivec_expand_lv_builtin (code, exp, target, false);
15902 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15904 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15905 : CODE_FOR_vsx_ld_elemrev_v4sf);
15906 return altivec_expand_lv_builtin (code, exp, target, false);
15908 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15910 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15911 : CODE_FOR_vsx_ld_elemrev_v4si);
15912 return altivec_expand_lv_builtin (code, exp, target, false);
15914 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15916 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15917 : CODE_FOR_vsx_ld_elemrev_v8hi);
15918 return altivec_expand_lv_builtin (code, exp, target, false);
15920 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15922 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15923 : CODE_FOR_vsx_ld_elemrev_v16qi);
15924 return altivec_expand_lv_builtin (code, exp, target, false);
15926 break;
15927 default:
15928 break;
15929 /* Fall through. */
15932 *expandedp = false;
15933 return NULL_RTX;
15936 /* Expand the builtin in EXP and store the result in TARGET. Store
15937 true in *EXPANDEDP if we found a builtin to expand. */
15938 static rtx
15939 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15941 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15942 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15943 const struct builtin_description *d;
15944 size_t i;
15946 *expandedp = true;
15948 switch (fcode)
15950 case PAIRED_BUILTIN_STX:
15951 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15952 case PAIRED_BUILTIN_LX:
15953 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15954 default:
15955 break;
15956 /* Fall through. */
15959 /* Expand the paired predicates. */
15960 d = bdesc_paired_preds;
15961 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15962 if (d->code == fcode)
15963 return paired_expand_predicate_builtin (d->icode, exp, target);
15965 *expandedp = false;
15966 return NULL_RTX;
15969 static rtx
15970 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15972 rtx pat, scratch, tmp;
15973 tree form = CALL_EXPR_ARG (exp, 0);
15974 tree arg0 = CALL_EXPR_ARG (exp, 1);
15975 tree arg1 = CALL_EXPR_ARG (exp, 2);
15976 rtx op0 = expand_normal (arg0);
15977 rtx op1 = expand_normal (arg1);
15978 machine_mode mode0 = insn_data[icode].operand[1].mode;
15979 machine_mode mode1 = insn_data[icode].operand[2].mode;
15980 int form_int;
15981 enum rtx_code code;
15983 if (TREE_CODE (form) != INTEGER_CST)
15985 error ("argument 1 of __builtin_paired_predicate must be a constant");
15986 return const0_rtx;
15988 else
15989 form_int = TREE_INT_CST_LOW (form);
15991 gcc_assert (mode0 == mode1);
15993 if (arg0 == error_mark_node || arg1 == error_mark_node)
15994 return const0_rtx;
15996 if (target == 0
15997 || GET_MODE (target) != SImode
15998 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
15999 target = gen_reg_rtx (SImode);
16000 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16001 op0 = copy_to_mode_reg (mode0, op0);
16002 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16003 op1 = copy_to_mode_reg (mode1, op1);
16005 scratch = gen_reg_rtx (CCFPmode);
16007 pat = GEN_FCN (icode) (scratch, op0, op1);
16008 if (!pat)
16009 return const0_rtx;
16011 emit_insn (pat);
16013 switch (form_int)
16015 /* LT bit. */
16016 case 0:
16017 code = LT;
16018 break;
16019 /* GT bit. */
16020 case 1:
16021 code = GT;
16022 break;
16023 /* EQ bit. */
16024 case 2:
16025 code = EQ;
16026 break;
16027 /* UN bit. */
16028 case 3:
16029 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16030 return target;
16031 default:
16032 error ("argument 1 of __builtin_paired_predicate is out of range");
16033 return const0_rtx;
16036 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16037 emit_move_insn (target, tmp);
16038 return target;
16041 /* Raise an error message for a builtin function that is called without the
16042 appropriate target options being set. */
16044 static void
16045 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16047 size_t uns_fncode = (size_t)fncode;
16048 const char *name = rs6000_builtin_info[uns_fncode].name;
16049 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16051 gcc_assert (name != NULL);
16052 if ((fnmask & RS6000_BTM_CELL) != 0)
16053 error ("Builtin function %s is only valid for the cell processor", name);
16054 else if ((fnmask & RS6000_BTM_VSX) != 0)
16055 error ("Builtin function %s requires the -mvsx option", name);
16056 else if ((fnmask & RS6000_BTM_HTM) != 0)
16057 error ("Builtin function %s requires the -mhtm option", name);
16058 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16059 error ("Builtin function %s requires the -maltivec option", name);
16060 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16061 error ("Builtin function %s requires the -mpaired option", name);
16062 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16063 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16064 error ("Builtin function %s requires the -mhard-dfp and"
16065 " -mpower8-vector options", name);
16066 else if ((fnmask & RS6000_BTM_DFP) != 0)
16067 error ("Builtin function %s requires the -mhard-dfp option", name);
16068 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16069 error ("Builtin function %s requires the -mpower8-vector option", name);
16070 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16071 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16072 error ("Builtin function %s requires the -mcpu=power9 and"
16073 " -m64 options", name);
16074 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16075 error ("Builtin function %s requires the -mcpu=power9 option", name);
16076 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16077 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16078 error ("Builtin function %s requires the -mcpu=power9 and"
16079 " -m64 options", name);
16080 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16081 error ("Builtin function %s requires the -mcpu=power9 option", name);
16082 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16083 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16084 error ("Builtin function %s requires the -mhard-float and"
16085 " -mlong-double-128 options", name);
16086 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16087 error ("Builtin function %s requires the -mhard-float option", name);
16088 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16089 error ("Builtin function %s requires the -mfloat128 option", name);
16090 else
16091 error ("Builtin function %s is not supported with the current options",
16092 name);
16095 /* Target hook for early folding of built-ins, shamelessly stolen
16096 from ia64.c. */
16098 static tree
16099 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16100 tree *args, bool ignore ATTRIBUTE_UNUSED)
16102 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16104 enum rs6000_builtins fn_code
16105 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16106 switch (fn_code)
16108 case RS6000_BUILTIN_NANQ:
16109 case RS6000_BUILTIN_NANSQ:
16111 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16112 const char *str = c_getstr (*args);
16113 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16114 REAL_VALUE_TYPE real;
16116 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16117 return build_real (type, real);
16118 return NULL_TREE;
16120 case RS6000_BUILTIN_INFQ:
16121 case RS6000_BUILTIN_HUGE_VALQ:
16123 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16124 REAL_VALUE_TYPE inf;
16125 real_inf (&inf);
16126 return build_real (type, inf);
16128 default:
16129 break;
16132 #ifdef SUBTARGET_FOLD_BUILTIN
16133 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16134 #else
16135 return NULL_TREE;
16136 #endif
16139 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16140 a constant, use rs6000_fold_builtin.) */
16142 bool
16143 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
16145 gimple *stmt = gsi_stmt (*gsi);
16146 tree fndecl = gimple_call_fndecl (stmt);
16147 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
16148 enum rs6000_builtins fn_code
16149 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16150 tree arg0, arg1, lhs;
16152 /* Generic solution to prevent gimple folding of code without a LHS. */
16153 if (!gimple_call_lhs (stmt))
16154 return false;
16156 switch (fn_code)
16158 /* Flavors of vec_add. We deliberately don't expand
16159 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
16160 TImode, resulting in much poorer code generation. */
16161 case ALTIVEC_BUILTIN_VADDUBM:
16162 case ALTIVEC_BUILTIN_VADDUHM:
16163 case ALTIVEC_BUILTIN_VADDUWM:
16164 case P8V_BUILTIN_VADDUDM:
16165 case ALTIVEC_BUILTIN_VADDFP:
16166 case VSX_BUILTIN_XVADDDP:
16168 arg0 = gimple_call_arg (stmt, 0);
16169 arg1 = gimple_call_arg (stmt, 1);
16170 lhs = gimple_call_lhs (stmt);
16171 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
16172 gimple_set_location (g, gimple_location (stmt));
16173 gsi_replace (gsi, g, true);
16174 return true;
16176 /* Flavors of vec_sub. We deliberately don't expand
16177 P8V_BUILTIN_VSUBUQM. */
16178 case ALTIVEC_BUILTIN_VSUBUBM:
16179 case ALTIVEC_BUILTIN_VSUBUHM:
16180 case ALTIVEC_BUILTIN_VSUBUWM:
16181 case P8V_BUILTIN_VSUBUDM:
16182 case ALTIVEC_BUILTIN_VSUBFP:
16183 case VSX_BUILTIN_XVSUBDP:
16185 arg0 = gimple_call_arg (stmt, 0);
16186 arg1 = gimple_call_arg (stmt, 1);
16187 lhs = gimple_call_lhs (stmt);
16188 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
16189 gimple_set_location (g, gimple_location (stmt));
16190 gsi_replace (gsi, g, true);
16191 return true;
16193 case VSX_BUILTIN_XVMULSP:
16194 case VSX_BUILTIN_XVMULDP:
16196 arg0 = gimple_call_arg (stmt, 0);
16197 arg1 = gimple_call_arg (stmt, 1);
16198 lhs = gimple_call_lhs (stmt);
16199 gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
16200 gimple_set_location (g, gimple_location (stmt));
16201 gsi_replace (gsi, g, true);
16202 return true;
16204 /* Even element flavors of vec_mul (signed). */
16205 case ALTIVEC_BUILTIN_VMULESB:
16206 case ALTIVEC_BUILTIN_VMULESH:
16207 /* Even element flavors of vec_mul (unsigned). */
16208 case ALTIVEC_BUILTIN_VMULEUB:
16209 case ALTIVEC_BUILTIN_VMULEUH:
16211 arg0 = gimple_call_arg (stmt, 0);
16212 arg1 = gimple_call_arg (stmt, 1);
16213 lhs = gimple_call_lhs (stmt);
16214 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
16215 gimple_set_location (g, gimple_location (stmt));
16216 gsi_replace (gsi, g, true);
16217 return true;
16219 /* Odd element flavors of vec_mul (signed). */
16220 case ALTIVEC_BUILTIN_VMULOSB:
16221 case ALTIVEC_BUILTIN_VMULOSH:
16222 /* Odd element flavors of vec_mul (unsigned). */
16223 case ALTIVEC_BUILTIN_VMULOUB:
16224 case ALTIVEC_BUILTIN_VMULOUH:
16226 arg0 = gimple_call_arg (stmt, 0);
16227 arg1 = gimple_call_arg (stmt, 1);
16228 lhs = gimple_call_lhs (stmt);
16229 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
16230 gimple_set_location (g, gimple_location (stmt));
16231 gsi_replace (gsi, g, true);
16232 return true;
16234 /* Flavors of vec_div (Integer). */
16235 case VSX_BUILTIN_DIV_V2DI:
16236 case VSX_BUILTIN_UDIV_V2DI:
16238 arg0 = gimple_call_arg (stmt, 0);
16239 arg1 = gimple_call_arg (stmt, 1);
16240 lhs = gimple_call_lhs (stmt);
16241 gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
16242 gimple_set_location (g, gimple_location (stmt));
16243 gsi_replace (gsi, g, true);
16244 return true;
16246 /* Flavors of vec_div (Float). */
16247 case VSX_BUILTIN_XVDIVSP:
16248 case VSX_BUILTIN_XVDIVDP:
16250 arg0 = gimple_call_arg (stmt, 0);
16251 arg1 = gimple_call_arg (stmt, 1);
16252 lhs = gimple_call_lhs (stmt);
16253 gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
16254 gimple_set_location (g, gimple_location (stmt));
16255 gsi_replace (gsi, g, true);
16256 return true;
16258 /* Flavors of vec_and. */
16259 case ALTIVEC_BUILTIN_VAND:
16261 arg0 = gimple_call_arg (stmt, 0);
16262 arg1 = gimple_call_arg (stmt, 1);
16263 lhs = gimple_call_lhs (stmt);
16264 gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
16265 gimple_set_location (g, gimple_location (stmt));
16266 gsi_replace (gsi, g, true);
16267 return true;
16269 /* Flavors of vec_andc. */
16270 case ALTIVEC_BUILTIN_VANDC:
16272 arg0 = gimple_call_arg (stmt, 0);
16273 arg1 = gimple_call_arg (stmt, 1);
16274 lhs = gimple_call_lhs (stmt);
16275 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16276 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
16277 gimple_set_location (g, gimple_location (stmt));
16278 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16279 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
16280 gimple_set_location (g, gimple_location (stmt));
16281 gsi_replace (gsi, g, true);
16282 return true;
16284 /* Flavors of vec_nand. */
16285 case P8V_BUILTIN_VEC_NAND:
16286 case P8V_BUILTIN_NAND_V16QI:
16287 case P8V_BUILTIN_NAND_V8HI:
16288 case P8V_BUILTIN_NAND_V4SI:
16289 case P8V_BUILTIN_NAND_V4SF:
16290 case P8V_BUILTIN_NAND_V2DF:
16291 case P8V_BUILTIN_NAND_V2DI:
16293 arg0 = gimple_call_arg (stmt, 0);
16294 arg1 = gimple_call_arg (stmt, 1);
16295 lhs = gimple_call_lhs (stmt);
16296 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16297 gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1);
16298 gimple_set_location (g, gimple_location (stmt));
16299 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16300 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16301 gimple_set_location (g, gimple_location (stmt));
16302 gsi_replace (gsi, g, true);
16303 return true;
16305 /* Flavors of vec_or. */
16306 case ALTIVEC_BUILTIN_VOR:
16308 arg0 = gimple_call_arg (stmt, 0);
16309 arg1 = gimple_call_arg (stmt, 1);
16310 lhs = gimple_call_lhs (stmt);
16311 gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
16312 gimple_set_location (g, gimple_location (stmt));
16313 gsi_replace (gsi, g, true);
16314 return true;
16316 /* flavors of vec_orc. */
16317 case P8V_BUILTIN_ORC_V16QI:
16318 case P8V_BUILTIN_ORC_V8HI:
16319 case P8V_BUILTIN_ORC_V4SI:
16320 case P8V_BUILTIN_ORC_V4SF:
16321 case P8V_BUILTIN_ORC_V2DF:
16322 case P8V_BUILTIN_ORC_V2DI:
16324 arg0 = gimple_call_arg (stmt, 0);
16325 arg1 = gimple_call_arg (stmt, 1);
16326 lhs = gimple_call_lhs (stmt);
16327 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16328 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
16329 gimple_set_location (g, gimple_location (stmt));
16330 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16331 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
16332 gimple_set_location (g, gimple_location (stmt));
16333 gsi_replace (gsi, g, true);
16334 return true;
16336 /* Flavors of vec_xor. */
16337 case ALTIVEC_BUILTIN_VXOR:
16339 arg0 = gimple_call_arg (stmt, 0);
16340 arg1 = gimple_call_arg (stmt, 1);
16341 lhs = gimple_call_lhs (stmt);
16342 gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
16343 gimple_set_location (g, gimple_location (stmt));
16344 gsi_replace (gsi, g, true);
16345 return true;
16347 /* Flavors of vec_nor. */
16348 case ALTIVEC_BUILTIN_VNOR:
16350 arg0 = gimple_call_arg (stmt, 0);
16351 arg1 = gimple_call_arg (stmt, 1);
16352 lhs = gimple_call_lhs (stmt);
16353 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16354 gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
16355 gimple_set_location (g, gimple_location (stmt));
16356 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16357 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16358 gimple_set_location (g, gimple_location (stmt));
16359 gsi_replace (gsi, g, true);
16360 return true;
16362 /* flavors of vec_abs. */
16363 case ALTIVEC_BUILTIN_ABS_V16QI:
16364 case ALTIVEC_BUILTIN_ABS_V8HI:
16365 case ALTIVEC_BUILTIN_ABS_V4SI:
16366 case ALTIVEC_BUILTIN_ABS_V4SF:
16367 case P8V_BUILTIN_ABS_V2DI:
16368 case VSX_BUILTIN_XVABSDP:
16370 arg0 = gimple_call_arg (stmt, 0);
16371 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16372 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16373 return false;
16374 lhs = gimple_call_lhs (stmt);
16375 gimple *g = gimple_build_assign (lhs, ABS_EXPR, arg0);
16376 gimple_set_location (g, gimple_location (stmt));
16377 gsi_replace (gsi, g, true);
16378 return true;
16380 /* flavors of vec_min. */
16381 case VSX_BUILTIN_XVMINDP:
16382 case P8V_BUILTIN_VMINSD:
16383 case P8V_BUILTIN_VMINUD:
16384 case ALTIVEC_BUILTIN_VMINSB:
16385 case ALTIVEC_BUILTIN_VMINSH:
16386 case ALTIVEC_BUILTIN_VMINSW:
16387 case ALTIVEC_BUILTIN_VMINUB:
16388 case ALTIVEC_BUILTIN_VMINUH:
16389 case ALTIVEC_BUILTIN_VMINUW:
16390 case ALTIVEC_BUILTIN_VMINFP:
16392 arg0 = gimple_call_arg (stmt, 0);
16393 arg1 = gimple_call_arg (stmt, 1);
16394 lhs = gimple_call_lhs (stmt);
16395 gimple *g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
16396 gimple_set_location (g, gimple_location (stmt));
16397 gsi_replace (gsi, g, true);
16398 return true;
16400 /* flavors of vec_max. */
16401 case VSX_BUILTIN_XVMAXDP:
16402 case P8V_BUILTIN_VMAXSD:
16403 case P8V_BUILTIN_VMAXUD:
16404 case ALTIVEC_BUILTIN_VMAXSB:
16405 case ALTIVEC_BUILTIN_VMAXSH:
16406 case ALTIVEC_BUILTIN_VMAXSW:
16407 case ALTIVEC_BUILTIN_VMAXUB:
16408 case ALTIVEC_BUILTIN_VMAXUH:
16409 case ALTIVEC_BUILTIN_VMAXUW:
16410 case ALTIVEC_BUILTIN_VMAXFP:
16412 arg0 = gimple_call_arg (stmt, 0);
16413 arg1 = gimple_call_arg (stmt, 1);
16414 lhs = gimple_call_lhs (stmt);
16415 gimple *g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
16416 gimple_set_location (g, gimple_location (stmt));
16417 gsi_replace (gsi, g, true);
16418 return true;
16420 /* Flavors of vec_eqv. */
16421 case P8V_BUILTIN_EQV_V16QI:
16422 case P8V_BUILTIN_EQV_V8HI:
16423 case P8V_BUILTIN_EQV_V4SI:
16424 case P8V_BUILTIN_EQV_V4SF:
16425 case P8V_BUILTIN_EQV_V2DF:
16426 case P8V_BUILTIN_EQV_V2DI:
16428 arg0 = gimple_call_arg (stmt, 0);
16429 arg1 = gimple_call_arg (stmt, 1);
16430 lhs = gimple_call_lhs (stmt);
16431 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16432 gimple *g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
16433 gimple_set_location (g, gimple_location (stmt));
16434 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16435 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16436 gimple_set_location (g, gimple_location (stmt));
16437 gsi_replace (gsi, g, true);
16438 return true;
16440 /* Flavors of vec_rotate_left. */
16441 case ALTIVEC_BUILTIN_VRLB:
16442 case ALTIVEC_BUILTIN_VRLH:
16443 case ALTIVEC_BUILTIN_VRLW:
16444 case P8V_BUILTIN_VRLD:
16446 arg0 = gimple_call_arg (stmt, 0);
16447 arg1 = gimple_call_arg (stmt, 1);
16448 lhs = gimple_call_lhs (stmt);
16449 gimple *g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
16450 gimple_set_location (g, gimple_location (stmt));
16451 gsi_replace (gsi, g, true);
16452 return true;
16454 /* Flavors of vector shift right algebraic.
16455 vec_sra{b,h,w} -> vsra{b,h,w}. */
16456 case ALTIVEC_BUILTIN_VSRAB:
16457 case ALTIVEC_BUILTIN_VSRAH:
16458 case ALTIVEC_BUILTIN_VSRAW:
16459 case P8V_BUILTIN_VSRAD:
16461 arg0 = gimple_call_arg (stmt, 0);
16462 arg1 = gimple_call_arg (stmt, 1);
16463 lhs = gimple_call_lhs (stmt);
16464 gimple *g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
16465 gimple_set_location (g, gimple_location (stmt));
16466 gsi_replace (gsi, g, true);
16467 return true;
16469 /* Flavors of vector shift left.
16470 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
16471 case ALTIVEC_BUILTIN_VSLB:
16472 case ALTIVEC_BUILTIN_VSLH:
16473 case ALTIVEC_BUILTIN_VSLW:
16474 case P8V_BUILTIN_VSLD:
16476 arg0 = gimple_call_arg (stmt, 0);
16477 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16478 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16479 return false;
16480 arg1 = gimple_call_arg (stmt, 1);
16481 lhs = gimple_call_lhs (stmt);
16482 gimple *g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
16483 gimple_set_location (g, gimple_location (stmt));
16484 gsi_replace (gsi, g, true);
16485 return true;
16487 /* Flavors of vector shift right. */
16488 case ALTIVEC_BUILTIN_VSRB:
16489 case ALTIVEC_BUILTIN_VSRH:
16490 case ALTIVEC_BUILTIN_VSRW:
16491 case P8V_BUILTIN_VSRD:
16493 arg0 = gimple_call_arg (stmt, 0);
16494 arg1 = gimple_call_arg (stmt, 1);
16495 lhs = gimple_call_lhs (stmt);
16496 gimple_seq stmts = NULL;
16497 /* Convert arg0 to unsigned. */
16498 tree arg0_unsigned
16499 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
16500 unsigned_type_for (TREE_TYPE (arg0)), arg0);
16501 tree res
16502 = gimple_build (&stmts, RSHIFT_EXPR,
16503 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
16504 /* Convert result back to the lhs type. */
16505 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
16506 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16507 update_call_from_tree (gsi, res);
16508 return true;
16510 default:
16511 break;
16514 return false;
16517 /* Expand an expression EXP that calls a built-in function,
16518 with result going to TARGET if that's convenient
16519 (and in mode MODE if that's convenient).
16520 SUBTARGET may be used as the target for computing one of EXP's operands.
16521 IGNORE is nonzero if the value is to be ignored. */
16523 static rtx
16524 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16525 machine_mode mode ATTRIBUTE_UNUSED,
16526 int ignore ATTRIBUTE_UNUSED)
16528 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16529 enum rs6000_builtins fcode
16530 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16531 size_t uns_fcode = (size_t)fcode;
16532 const struct builtin_description *d;
16533 size_t i;
16534 rtx ret;
16535 bool success;
16536 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16537 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16539 if (TARGET_DEBUG_BUILTIN)
16541 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16542 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16543 const char *name2 = ((icode != CODE_FOR_nothing)
16544 ? get_insn_name ((int)icode)
16545 : "nothing");
16546 const char *name3;
16548 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16550 default: name3 = "unknown"; break;
16551 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16552 case RS6000_BTC_UNARY: name3 = "unary"; break;
16553 case RS6000_BTC_BINARY: name3 = "binary"; break;
16554 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16555 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16556 case RS6000_BTC_ABS: name3 = "abs"; break;
16557 case RS6000_BTC_DST: name3 = "dst"; break;
16561 fprintf (stderr,
16562 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16563 (name1) ? name1 : "---", fcode,
16564 (name2) ? name2 : "---", (int)icode,
16565 name3,
16566 func_valid_p ? "" : ", not valid");
16569 if (!func_valid_p)
16571 rs6000_invalid_builtin (fcode);
16573 /* Given it is invalid, just generate a normal call. */
16574 return expand_call (exp, target, ignore);
16577 switch (fcode)
16579 case RS6000_BUILTIN_RECIP:
16580 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16582 case RS6000_BUILTIN_RECIPF:
16583 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16585 case RS6000_BUILTIN_RSQRTF:
16586 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16588 case RS6000_BUILTIN_RSQRT:
16589 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16591 case POWER7_BUILTIN_BPERMD:
16592 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16593 ? CODE_FOR_bpermd_di
16594 : CODE_FOR_bpermd_si), exp, target);
16596 case RS6000_BUILTIN_GET_TB:
16597 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16598 target);
16600 case RS6000_BUILTIN_MFTB:
16601 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16602 ? CODE_FOR_rs6000_mftb_di
16603 : CODE_FOR_rs6000_mftb_si),
16604 target);
16606 case RS6000_BUILTIN_MFFS:
16607 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16609 case RS6000_BUILTIN_MTFSF:
16610 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16612 case RS6000_BUILTIN_CPU_INIT:
16613 case RS6000_BUILTIN_CPU_IS:
16614 case RS6000_BUILTIN_CPU_SUPPORTS:
16615 return cpu_expand_builtin (fcode, exp, target);
16617 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16618 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16620 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16621 : (int) CODE_FOR_altivec_lvsl_direct);
16622 machine_mode tmode = insn_data[icode].operand[0].mode;
16623 machine_mode mode = insn_data[icode].operand[1].mode;
16624 tree arg;
16625 rtx op, addr, pat;
16627 gcc_assert (TARGET_ALTIVEC);
16629 arg = CALL_EXPR_ARG (exp, 0);
16630 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16631 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16632 addr = memory_address (mode, op);
16633 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16634 op = addr;
16635 else
16637 /* For the load case need to negate the address. */
16638 op = gen_reg_rtx (GET_MODE (addr));
16639 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16641 op = gen_rtx_MEM (mode, op);
16643 if (target == 0
16644 || GET_MODE (target) != tmode
16645 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16646 target = gen_reg_rtx (tmode);
16648 pat = GEN_FCN (icode) (target, op);
16649 if (!pat)
16650 return 0;
16651 emit_insn (pat);
16653 return target;
16656 case ALTIVEC_BUILTIN_VCFUX:
16657 case ALTIVEC_BUILTIN_VCFSX:
16658 case ALTIVEC_BUILTIN_VCTUXS:
16659 case ALTIVEC_BUILTIN_VCTSXS:
16660 /* FIXME: There's got to be a nicer way to handle this case than
16661 constructing a new CALL_EXPR. */
16662 if (call_expr_nargs (exp) == 1)
16664 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16665 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16667 break;
16669 default:
16670 break;
16673 if (TARGET_ALTIVEC)
16675 ret = altivec_expand_builtin (exp, target, &success);
16677 if (success)
16678 return ret;
16680 if (TARGET_PAIRED_FLOAT)
16682 ret = paired_expand_builtin (exp, target, &success);
16684 if (success)
16685 return ret;
16687 if (TARGET_HTM)
16689 ret = htm_expand_builtin (exp, target, &success);
16691 if (success)
16692 return ret;
16695 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16696 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16697 gcc_assert (attr == RS6000_BTC_UNARY
16698 || attr == RS6000_BTC_BINARY
16699 || attr == RS6000_BTC_TERNARY
16700 || attr == RS6000_BTC_SPECIAL);
16702 /* Handle simple unary operations. */
16703 d = bdesc_1arg;
16704 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16705 if (d->code == fcode)
16706 return rs6000_expand_unop_builtin (d->icode, exp, target);
16708 /* Handle simple binary operations. */
16709 d = bdesc_2arg;
16710 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16711 if (d->code == fcode)
16712 return rs6000_expand_binop_builtin (d->icode, exp, target);
16714 /* Handle simple ternary operations. */
16715 d = bdesc_3arg;
16716 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16717 if (d->code == fcode)
16718 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16720 /* Handle simple no-argument operations. */
16721 d = bdesc_0arg;
16722 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16723 if (d->code == fcode)
16724 return rs6000_expand_zeroop_builtin (d->icode, target);
16726 gcc_unreachable ();
16729 /* Create a builtin vector type with a name. Taking care not to give
16730 the canonical type a name. */
16732 static tree
16733 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16735 tree result = build_vector_type (elt_type, num_elts);
16737 /* Copy so we don't give the canonical type a name. */
16738 result = build_variant_type_copy (result);
16740 add_builtin_type (name, result);
16742 return result;
16745 static void
16746 rs6000_init_builtins (void)
16748 tree tdecl;
16749 tree ftype;
16750 machine_mode mode;
16752 if (TARGET_DEBUG_BUILTIN)
16753 fprintf (stderr, "rs6000_init_builtins%s%s%s\n",
16754 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16755 (TARGET_ALTIVEC) ? ", altivec" : "",
16756 (TARGET_VSX) ? ", vsx" : "");
16758 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16759 V2SF_type_node = build_vector_type (float_type_node, 2);
16760 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16761 : "__vector long long",
16762 intDI_type_node, 2);
16763 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16764 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16765 intSI_type_node, 4);
16766 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16767 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16768 intHI_type_node, 8);
16769 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16770 intQI_type_node, 16);
16772 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16773 unsigned_intQI_type_node, 16);
16774 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16775 unsigned_intHI_type_node, 8);
16776 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16777 unsigned_intSI_type_node, 4);
16778 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16779 ? "__vector unsigned long"
16780 : "__vector unsigned long long",
16781 unsigned_intDI_type_node, 2);
16783 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16784 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16785 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16786 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16788 const_str_type_node
16789 = build_pointer_type (build_qualified_type (char_type_node,
16790 TYPE_QUAL_CONST));
16792 /* We use V1TI mode as a special container to hold __int128_t items that
16793 must live in VSX registers. */
16794 if (intTI_type_node)
16796 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16797 intTI_type_node, 1);
16798 unsigned_V1TI_type_node
16799 = rs6000_vector_type ("__vector unsigned __int128",
16800 unsigned_intTI_type_node, 1);
16803 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16804 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16805 'vector unsigned short'. */
16807 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16808 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16809 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16810 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16811 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16813 long_integer_type_internal_node = long_integer_type_node;
16814 long_unsigned_type_internal_node = long_unsigned_type_node;
16815 long_long_integer_type_internal_node = long_long_integer_type_node;
16816 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16817 intQI_type_internal_node = intQI_type_node;
16818 uintQI_type_internal_node = unsigned_intQI_type_node;
16819 intHI_type_internal_node = intHI_type_node;
16820 uintHI_type_internal_node = unsigned_intHI_type_node;
16821 intSI_type_internal_node = intSI_type_node;
16822 uintSI_type_internal_node = unsigned_intSI_type_node;
16823 intDI_type_internal_node = intDI_type_node;
16824 uintDI_type_internal_node = unsigned_intDI_type_node;
16825 intTI_type_internal_node = intTI_type_node;
16826 uintTI_type_internal_node = unsigned_intTI_type_node;
16827 float_type_internal_node = float_type_node;
16828 double_type_internal_node = double_type_node;
16829 long_double_type_internal_node = long_double_type_node;
16830 dfloat64_type_internal_node = dfloat64_type_node;
16831 dfloat128_type_internal_node = dfloat128_type_node;
16832 void_type_internal_node = void_type_node;
16834 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16835 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16836 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16837 format that uses a pair of doubles, depending on the switches and
16838 defaults.
16840 We do not enable the actual __float128 keyword unless the user explicitly
16841 asks for it, because the library support is not yet complete.
16843 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16844 floating point, we need make sure the type is non-zero or else self-test
16845 fails during bootstrap.
16847 We don't register a built-in type for __ibm128 if the type is the same as
16848 long double. Instead we add a #define for __ibm128 in
16849 rs6000_cpu_cpp_builtins to long double. */
16850 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
16852 ibm128_float_type_node = make_node (REAL_TYPE);
16853 TYPE_PRECISION (ibm128_float_type_node) = 128;
16854 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16855 layout_type (ibm128_float_type_node);
16857 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16858 "__ibm128");
16860 else
16861 ibm128_float_type_node = long_double_type_node;
16863 if (TARGET_FLOAT128_KEYWORD)
16865 ieee128_float_type_node = float128_type_node;
16866 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16867 "__float128");
16870 else if (TARGET_FLOAT128_TYPE)
16872 ieee128_float_type_node = make_node (REAL_TYPE);
16873 TYPE_PRECISION (ibm128_float_type_node) = 128;
16874 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
16875 layout_type (ieee128_float_type_node);
16877 /* If we are not exporting the __float128/_Float128 keywords, we need a
16878 keyword to get the types created. Use __ieee128 as the dummy
16879 keyword. */
16880 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16881 "__ieee128");
16884 else
16885 ieee128_float_type_node = long_double_type_node;
16887 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16888 tree type node. */
16889 builtin_mode_to_type[QImode][0] = integer_type_node;
16890 builtin_mode_to_type[HImode][0] = integer_type_node;
16891 builtin_mode_to_type[SImode][0] = intSI_type_node;
16892 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16893 builtin_mode_to_type[DImode][0] = intDI_type_node;
16894 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16895 builtin_mode_to_type[TImode][0] = intTI_type_node;
16896 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16897 builtin_mode_to_type[SFmode][0] = float_type_node;
16898 builtin_mode_to_type[DFmode][0] = double_type_node;
16899 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16900 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16901 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16902 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16903 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16904 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16905 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16906 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
16907 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
16908 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16909 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16910 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16911 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16912 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16913 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16914 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16915 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16916 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16917 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16919 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16920 TYPE_NAME (bool_char_type_node) = tdecl;
16922 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16923 TYPE_NAME (bool_short_type_node) = tdecl;
16925 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16926 TYPE_NAME (bool_int_type_node) = tdecl;
16928 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16929 TYPE_NAME (pixel_type_node) = tdecl;
16931 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16932 bool_char_type_node, 16);
16933 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16934 bool_short_type_node, 8);
16935 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16936 bool_int_type_node, 4);
16937 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16938 ? "__vector __bool long"
16939 : "__vector __bool long long",
16940 bool_long_type_node, 2);
16941 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16942 pixel_type_node, 8);
16944 /* Paired builtins are only available if you build a compiler with the
16945 appropriate options, so only create those builtins with the appropriate
16946 compiler option. Create Altivec and VSX builtins on machines with at
16947 least the general purpose extensions (970 and newer) to allow the use of
16948 the target attribute. */
16949 if (TARGET_PAIRED_FLOAT)
16950 paired_init_builtins ();
16951 if (TARGET_EXTRA_BUILTINS)
16952 altivec_init_builtins ();
16953 if (TARGET_HTM)
16954 htm_init_builtins ();
16956 if (TARGET_EXTRA_BUILTINS || TARGET_PAIRED_FLOAT)
16957 rs6000_common_init_builtins ();
16959 ftype = build_function_type_list (ieee128_float_type_node,
16960 const_str_type_node, NULL_TREE);
16961 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16962 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16964 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16965 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16966 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16968 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16969 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16970 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16972 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16973 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16974 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16976 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16977 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16978 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16980 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16981 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16982 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16984 mode = (TARGET_64BIT) ? DImode : SImode;
16985 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16986 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16987 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16989 ftype = build_function_type_list (unsigned_intDI_type_node,
16990 NULL_TREE);
16991 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16993 if (TARGET_64BIT)
16994 ftype = build_function_type_list (unsigned_intDI_type_node,
16995 NULL_TREE);
16996 else
16997 ftype = build_function_type_list (unsigned_intSI_type_node,
16998 NULL_TREE);
16999 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17001 ftype = build_function_type_list (double_type_node, NULL_TREE);
17002 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17004 ftype = build_function_type_list (void_type_node,
17005 intSI_type_node, double_type_node,
17006 NULL_TREE);
17007 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17009 ftype = build_function_type_list (void_type_node, NULL_TREE);
17010 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17012 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17013 NULL_TREE);
17014 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17015 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17017 /* AIX libm provides clog as __clog. */
17018 if (TARGET_XCOFF &&
17019 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17020 set_user_assembler_name (tdecl, "__clog");
17022 #ifdef SUBTARGET_INIT_BUILTINS
17023 SUBTARGET_INIT_BUILTINS;
17024 #endif
17027 /* Returns the rs6000 builtin decl for CODE. */
17029 static tree
17030 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17032 HOST_WIDE_INT fnmask;
17034 if (code >= RS6000_BUILTIN_COUNT)
17035 return error_mark_node;
17037 fnmask = rs6000_builtin_info[code].mask;
17038 if ((fnmask & rs6000_builtin_mask) != fnmask)
17040 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17041 return error_mark_node;
17044 return rs6000_builtin_decls[code];
17047 static void
17048 paired_init_builtins (void)
17050 const struct builtin_description *d;
17051 size_t i;
17052 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17054 tree int_ftype_int_v2sf_v2sf
17055 = build_function_type_list (integer_type_node,
17056 integer_type_node,
17057 V2SF_type_node,
17058 V2SF_type_node,
17059 NULL_TREE);
17060 tree pcfloat_type_node =
17061 build_pointer_type (build_qualified_type
17062 (float_type_node, TYPE_QUAL_CONST));
17064 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17065 long_integer_type_node,
17066 pcfloat_type_node,
17067 NULL_TREE);
17068 tree void_ftype_v2sf_long_pcfloat =
17069 build_function_type_list (void_type_node,
17070 V2SF_type_node,
17071 long_integer_type_node,
17072 pcfloat_type_node,
17073 NULL_TREE);
17076 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17077 PAIRED_BUILTIN_LX);
17080 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17081 PAIRED_BUILTIN_STX);
17083 /* Predicates. */
17084 d = bdesc_paired_preds;
17085 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17087 tree type;
17088 HOST_WIDE_INT mask = d->mask;
17090 if ((mask & builtin_mask) != mask)
17092 if (TARGET_DEBUG_BUILTIN)
17093 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17094 d->name);
17095 continue;
17098 /* Cannot define builtin if the instruction is disabled. */
17099 gcc_assert (d->icode != CODE_FOR_nothing);
17101 if (TARGET_DEBUG_BUILTIN)
17102 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17103 (int)i, get_insn_name (d->icode), (int)d->icode,
17104 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17106 switch (insn_data[d->icode].operand[1].mode)
17108 case V2SFmode:
17109 type = int_ftype_int_v2sf_v2sf;
17110 break;
17111 default:
17112 gcc_unreachable ();
17115 def_builtin (d->name, type, d->code);
17119 static void
17120 altivec_init_builtins (void)
17122 const struct builtin_description *d;
17123 size_t i;
17124 tree ftype;
17125 tree decl;
17126 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17128 tree pvoid_type_node = build_pointer_type (void_type_node);
17130 tree pcvoid_type_node
17131 = build_pointer_type (build_qualified_type (void_type_node,
17132 TYPE_QUAL_CONST));
17134 tree int_ftype_opaque
17135 = build_function_type_list (integer_type_node,
17136 opaque_V4SI_type_node, NULL_TREE);
17137 tree opaque_ftype_opaque
17138 = build_function_type_list (integer_type_node, NULL_TREE);
17139 tree opaque_ftype_opaque_int
17140 = build_function_type_list (opaque_V4SI_type_node,
17141 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17142 tree opaque_ftype_opaque_opaque_int
17143 = build_function_type_list (opaque_V4SI_type_node,
17144 opaque_V4SI_type_node, opaque_V4SI_type_node,
17145 integer_type_node, NULL_TREE);
17146 tree opaque_ftype_opaque_opaque_opaque
17147 = build_function_type_list (opaque_V4SI_type_node,
17148 opaque_V4SI_type_node, opaque_V4SI_type_node,
17149 opaque_V4SI_type_node, NULL_TREE);
17150 tree opaque_ftype_opaque_opaque
17151 = build_function_type_list (opaque_V4SI_type_node,
17152 opaque_V4SI_type_node, opaque_V4SI_type_node,
17153 NULL_TREE);
17154 tree int_ftype_int_opaque_opaque
17155 = build_function_type_list (integer_type_node,
17156 integer_type_node, opaque_V4SI_type_node,
17157 opaque_V4SI_type_node, NULL_TREE);
17158 tree int_ftype_int_v4si_v4si
17159 = build_function_type_list (integer_type_node,
17160 integer_type_node, V4SI_type_node,
17161 V4SI_type_node, NULL_TREE);
17162 tree int_ftype_int_v2di_v2di
17163 = build_function_type_list (integer_type_node,
17164 integer_type_node, V2DI_type_node,
17165 V2DI_type_node, NULL_TREE);
17166 tree void_ftype_v4si
17167 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17168 tree v8hi_ftype_void
17169 = build_function_type_list (V8HI_type_node, NULL_TREE);
17170 tree void_ftype_void
17171 = build_function_type_list (void_type_node, NULL_TREE);
17172 tree void_ftype_int
17173 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17175 tree opaque_ftype_long_pcvoid
17176 = build_function_type_list (opaque_V4SI_type_node,
17177 long_integer_type_node, pcvoid_type_node,
17178 NULL_TREE);
17179 tree v16qi_ftype_long_pcvoid
17180 = build_function_type_list (V16QI_type_node,
17181 long_integer_type_node, pcvoid_type_node,
17182 NULL_TREE);
17183 tree v8hi_ftype_long_pcvoid
17184 = build_function_type_list (V8HI_type_node,
17185 long_integer_type_node, pcvoid_type_node,
17186 NULL_TREE);
17187 tree v4si_ftype_long_pcvoid
17188 = build_function_type_list (V4SI_type_node,
17189 long_integer_type_node, pcvoid_type_node,
17190 NULL_TREE);
17191 tree v4sf_ftype_long_pcvoid
17192 = build_function_type_list (V4SF_type_node,
17193 long_integer_type_node, pcvoid_type_node,
17194 NULL_TREE);
17195 tree v2df_ftype_long_pcvoid
17196 = build_function_type_list (V2DF_type_node,
17197 long_integer_type_node, pcvoid_type_node,
17198 NULL_TREE);
17199 tree v2di_ftype_long_pcvoid
17200 = build_function_type_list (V2DI_type_node,
17201 long_integer_type_node, pcvoid_type_node,
17202 NULL_TREE);
17204 tree void_ftype_opaque_long_pvoid
17205 = build_function_type_list (void_type_node,
17206 opaque_V4SI_type_node, long_integer_type_node,
17207 pvoid_type_node, NULL_TREE);
17208 tree void_ftype_v4si_long_pvoid
17209 = build_function_type_list (void_type_node,
17210 V4SI_type_node, long_integer_type_node,
17211 pvoid_type_node, NULL_TREE);
17212 tree void_ftype_v16qi_long_pvoid
17213 = build_function_type_list (void_type_node,
17214 V16QI_type_node, long_integer_type_node,
17215 pvoid_type_node, NULL_TREE);
17217 tree void_ftype_v16qi_pvoid_long
17218 = build_function_type_list (void_type_node,
17219 V16QI_type_node, pvoid_type_node,
17220 long_integer_type_node, NULL_TREE);
17222 tree void_ftype_v8hi_long_pvoid
17223 = build_function_type_list (void_type_node,
17224 V8HI_type_node, long_integer_type_node,
17225 pvoid_type_node, NULL_TREE);
17226 tree void_ftype_v4sf_long_pvoid
17227 = build_function_type_list (void_type_node,
17228 V4SF_type_node, long_integer_type_node,
17229 pvoid_type_node, NULL_TREE);
17230 tree void_ftype_v2df_long_pvoid
17231 = build_function_type_list (void_type_node,
17232 V2DF_type_node, long_integer_type_node,
17233 pvoid_type_node, NULL_TREE);
17234 tree void_ftype_v2di_long_pvoid
17235 = build_function_type_list (void_type_node,
17236 V2DI_type_node, long_integer_type_node,
17237 pvoid_type_node, NULL_TREE);
17238 tree int_ftype_int_v8hi_v8hi
17239 = build_function_type_list (integer_type_node,
17240 integer_type_node, V8HI_type_node,
17241 V8HI_type_node, NULL_TREE);
17242 tree int_ftype_int_v16qi_v16qi
17243 = build_function_type_list (integer_type_node,
17244 integer_type_node, V16QI_type_node,
17245 V16QI_type_node, NULL_TREE);
17246 tree int_ftype_int_v4sf_v4sf
17247 = build_function_type_list (integer_type_node,
17248 integer_type_node, V4SF_type_node,
17249 V4SF_type_node, NULL_TREE);
17250 tree int_ftype_int_v2df_v2df
17251 = build_function_type_list (integer_type_node,
17252 integer_type_node, V2DF_type_node,
17253 V2DF_type_node, NULL_TREE);
17254 tree v2di_ftype_v2di
17255 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17256 tree v4si_ftype_v4si
17257 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17258 tree v8hi_ftype_v8hi
17259 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17260 tree v16qi_ftype_v16qi
17261 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17262 tree v4sf_ftype_v4sf
17263 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17264 tree v2df_ftype_v2df
17265 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17266 tree void_ftype_pcvoid_int_int
17267 = build_function_type_list (void_type_node,
17268 pcvoid_type_node, integer_type_node,
17269 integer_type_node, NULL_TREE);
17271 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17272 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17273 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17274 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17275 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17276 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17277 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17278 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17279 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17280 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17281 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17282 ALTIVEC_BUILTIN_LVXL_V2DF);
17283 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17284 ALTIVEC_BUILTIN_LVXL_V2DI);
17285 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17286 ALTIVEC_BUILTIN_LVXL_V4SF);
17287 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17288 ALTIVEC_BUILTIN_LVXL_V4SI);
17289 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17290 ALTIVEC_BUILTIN_LVXL_V8HI);
17291 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17292 ALTIVEC_BUILTIN_LVXL_V16QI);
17293 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17294 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17295 ALTIVEC_BUILTIN_LVX_V2DF);
17296 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17297 ALTIVEC_BUILTIN_LVX_V2DI);
17298 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17299 ALTIVEC_BUILTIN_LVX_V4SF);
17300 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17301 ALTIVEC_BUILTIN_LVX_V4SI);
17302 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17303 ALTIVEC_BUILTIN_LVX_V8HI);
17304 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17305 ALTIVEC_BUILTIN_LVX_V16QI);
17306 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17307 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17308 ALTIVEC_BUILTIN_STVX_V2DF);
17309 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17310 ALTIVEC_BUILTIN_STVX_V2DI);
17311 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17312 ALTIVEC_BUILTIN_STVX_V4SF);
17313 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17314 ALTIVEC_BUILTIN_STVX_V4SI);
17315 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17316 ALTIVEC_BUILTIN_STVX_V8HI);
17317 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17318 ALTIVEC_BUILTIN_STVX_V16QI);
17319 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17320 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17321 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17322 ALTIVEC_BUILTIN_STVXL_V2DF);
17323 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17324 ALTIVEC_BUILTIN_STVXL_V2DI);
17325 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17326 ALTIVEC_BUILTIN_STVXL_V4SF);
17327 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17328 ALTIVEC_BUILTIN_STVXL_V4SI);
17329 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17330 ALTIVEC_BUILTIN_STVXL_V8HI);
17331 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17332 ALTIVEC_BUILTIN_STVXL_V16QI);
17333 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17334 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17335 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17336 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17337 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17338 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17339 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17340 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17341 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17342 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17343 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17344 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17345 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17346 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17347 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17348 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17350 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17351 VSX_BUILTIN_LXVD2X_V2DF);
17352 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17353 VSX_BUILTIN_LXVD2X_V2DI);
17354 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17355 VSX_BUILTIN_LXVW4X_V4SF);
17356 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17357 VSX_BUILTIN_LXVW4X_V4SI);
17358 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17359 VSX_BUILTIN_LXVW4X_V8HI);
17360 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17361 VSX_BUILTIN_LXVW4X_V16QI);
17362 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17363 VSX_BUILTIN_STXVD2X_V2DF);
17364 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17365 VSX_BUILTIN_STXVD2X_V2DI);
17366 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17367 VSX_BUILTIN_STXVW4X_V4SF);
17368 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17369 VSX_BUILTIN_STXVW4X_V4SI);
17370 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17371 VSX_BUILTIN_STXVW4X_V8HI);
17372 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17373 VSX_BUILTIN_STXVW4X_V16QI);
17375 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17376 VSX_BUILTIN_LD_ELEMREV_V2DF);
17377 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17378 VSX_BUILTIN_LD_ELEMREV_V2DI);
17379 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17380 VSX_BUILTIN_LD_ELEMREV_V4SF);
17381 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17382 VSX_BUILTIN_LD_ELEMREV_V4SI);
17383 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17384 VSX_BUILTIN_ST_ELEMREV_V2DF);
17385 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17386 VSX_BUILTIN_ST_ELEMREV_V2DI);
17387 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17388 VSX_BUILTIN_ST_ELEMREV_V4SF);
17389 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17390 VSX_BUILTIN_ST_ELEMREV_V4SI);
17392 if (TARGET_P9_VECTOR)
17394 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17395 VSX_BUILTIN_LD_ELEMREV_V8HI);
17396 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17397 VSX_BUILTIN_LD_ELEMREV_V16QI);
17398 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17399 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17400 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17401 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17403 else
17405 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
17406 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
17407 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
17408 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
17409 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
17410 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
17411 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
17412 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
17415 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17416 VSX_BUILTIN_VEC_LD);
17417 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17418 VSX_BUILTIN_VEC_ST);
17419 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17420 VSX_BUILTIN_VEC_XL);
17421 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17422 VSX_BUILTIN_VEC_XST);
17424 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17425 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17426 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17428 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17429 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17430 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17431 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17432 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17433 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17434 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17435 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17436 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17437 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17438 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17439 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17441 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17442 ALTIVEC_BUILTIN_VEC_ADDE);
17443 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17444 ALTIVEC_BUILTIN_VEC_ADDEC);
17445 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17446 ALTIVEC_BUILTIN_VEC_CMPNE);
17447 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17448 ALTIVEC_BUILTIN_VEC_MUL);
17449 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
17450 ALTIVEC_BUILTIN_VEC_SUBE);
17451 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
17452 ALTIVEC_BUILTIN_VEC_SUBEC);
17454 /* Cell builtins. */
17455 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17456 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17457 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17458 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17460 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17461 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17462 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17463 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17465 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17466 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17467 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17468 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17470 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17471 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17472 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17473 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17475 if (TARGET_P9_VECTOR)
17476 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17477 P9V_BUILTIN_STXVL);
17479 /* Add the DST variants. */
17480 d = bdesc_dst;
17481 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17483 HOST_WIDE_INT mask = d->mask;
17485 /* It is expected that these dst built-in functions may have
17486 d->icode equal to CODE_FOR_nothing. */
17487 if ((mask & builtin_mask) != mask)
17489 if (TARGET_DEBUG_BUILTIN)
17490 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17491 d->name);
17492 continue;
17494 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17497 /* Initialize the predicates. */
17498 d = bdesc_altivec_preds;
17499 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17501 machine_mode mode1;
17502 tree type;
17503 HOST_WIDE_INT mask = d->mask;
17505 if ((mask & builtin_mask) != mask)
17507 if (TARGET_DEBUG_BUILTIN)
17508 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17509 d->name);
17510 continue;
17513 if (rs6000_overloaded_builtin_p (d->code))
17514 mode1 = VOIDmode;
17515 else
17517 /* Cannot define builtin if the instruction is disabled. */
17518 gcc_assert (d->icode != CODE_FOR_nothing);
17519 mode1 = insn_data[d->icode].operand[1].mode;
17522 switch (mode1)
17524 case VOIDmode:
17525 type = int_ftype_int_opaque_opaque;
17526 break;
17527 case V2DImode:
17528 type = int_ftype_int_v2di_v2di;
17529 break;
17530 case V4SImode:
17531 type = int_ftype_int_v4si_v4si;
17532 break;
17533 case V8HImode:
17534 type = int_ftype_int_v8hi_v8hi;
17535 break;
17536 case V16QImode:
17537 type = int_ftype_int_v16qi_v16qi;
17538 break;
17539 case V4SFmode:
17540 type = int_ftype_int_v4sf_v4sf;
17541 break;
17542 case V2DFmode:
17543 type = int_ftype_int_v2df_v2df;
17544 break;
17545 default:
17546 gcc_unreachable ();
17549 def_builtin (d->name, type, d->code);
17552 /* Initialize the abs* operators. */
17553 d = bdesc_abs;
17554 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17556 machine_mode mode0;
17557 tree type;
17558 HOST_WIDE_INT mask = d->mask;
17560 if ((mask & builtin_mask) != mask)
17562 if (TARGET_DEBUG_BUILTIN)
17563 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17564 d->name);
17565 continue;
17568 /* Cannot define builtin if the instruction is disabled. */
17569 gcc_assert (d->icode != CODE_FOR_nothing);
17570 mode0 = insn_data[d->icode].operand[0].mode;
17572 switch (mode0)
17574 case V2DImode:
17575 type = v2di_ftype_v2di;
17576 break;
17577 case V4SImode:
17578 type = v4si_ftype_v4si;
17579 break;
17580 case V8HImode:
17581 type = v8hi_ftype_v8hi;
17582 break;
17583 case V16QImode:
17584 type = v16qi_ftype_v16qi;
17585 break;
17586 case V4SFmode:
17587 type = v4sf_ftype_v4sf;
17588 break;
17589 case V2DFmode:
17590 type = v2df_ftype_v2df;
17591 break;
17592 default:
17593 gcc_unreachable ();
17596 def_builtin (d->name, type, d->code);
17599 /* Initialize target builtin that implements
17600 targetm.vectorize.builtin_mask_for_load. */
17602 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17603 v16qi_ftype_long_pcvoid,
17604 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17605 BUILT_IN_MD, NULL, NULL_TREE);
17606 TREE_READONLY (decl) = 1;
17607 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17608 altivec_builtin_mask_for_load = decl;
17610 /* Access to the vec_init patterns. */
17611 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17612 integer_type_node, integer_type_node,
17613 integer_type_node, NULL_TREE);
17614 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17616 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17617 short_integer_type_node,
17618 short_integer_type_node,
17619 short_integer_type_node,
17620 short_integer_type_node,
17621 short_integer_type_node,
17622 short_integer_type_node,
17623 short_integer_type_node, NULL_TREE);
17624 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17626 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17627 char_type_node, char_type_node,
17628 char_type_node, char_type_node,
17629 char_type_node, char_type_node,
17630 char_type_node, char_type_node,
17631 char_type_node, char_type_node,
17632 char_type_node, char_type_node,
17633 char_type_node, char_type_node,
17634 char_type_node, NULL_TREE);
17635 def_builtin ("__builtin_vec_init_v16qi", ftype,
17636 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17638 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17639 float_type_node, float_type_node,
17640 float_type_node, NULL_TREE);
17641 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17643 /* VSX builtins. */
17644 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17645 double_type_node, NULL_TREE);
17646 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17648 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17649 intDI_type_node, NULL_TREE);
17650 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17652 /* Access to the vec_set patterns. */
17653 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17654 intSI_type_node,
17655 integer_type_node, NULL_TREE);
17656 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17658 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17659 intHI_type_node,
17660 integer_type_node, NULL_TREE);
17661 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17663 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17664 intQI_type_node,
17665 integer_type_node, NULL_TREE);
17666 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17668 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17669 float_type_node,
17670 integer_type_node, NULL_TREE);
17671 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17673 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17674 double_type_node,
17675 integer_type_node, NULL_TREE);
17676 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17678 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17679 intDI_type_node,
17680 integer_type_node, NULL_TREE);
17681 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17683 /* Access to the vec_extract patterns. */
17684 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17685 integer_type_node, NULL_TREE);
17686 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17688 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17689 integer_type_node, NULL_TREE);
17690 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17692 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17693 integer_type_node, NULL_TREE);
17694 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17696 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17697 integer_type_node, NULL_TREE);
17698 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17700 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17701 integer_type_node, NULL_TREE);
17702 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17704 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17705 integer_type_node, NULL_TREE);
17706 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17709 if (V1TI_type_node)
17711 tree v1ti_ftype_long_pcvoid
17712 = build_function_type_list (V1TI_type_node,
17713 long_integer_type_node, pcvoid_type_node,
17714 NULL_TREE);
17715 tree void_ftype_v1ti_long_pvoid
17716 = build_function_type_list (void_type_node,
17717 V1TI_type_node, long_integer_type_node,
17718 pvoid_type_node, NULL_TREE);
17719 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17720 VSX_BUILTIN_LXVD2X_V1TI);
17721 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17722 VSX_BUILTIN_STXVD2X_V1TI);
17723 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17724 NULL_TREE, NULL_TREE);
17725 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17726 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17727 intTI_type_node,
17728 integer_type_node, NULL_TREE);
17729 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17730 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17731 integer_type_node, NULL_TREE);
17732 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17737 static void
17738 htm_init_builtins (void)
17740 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17741 const struct builtin_description *d;
17742 size_t i;
17744 d = bdesc_htm;
17745 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17747 tree op[MAX_HTM_OPERANDS], type;
17748 HOST_WIDE_INT mask = d->mask;
17749 unsigned attr = rs6000_builtin_info[d->code].attr;
17750 bool void_func = (attr & RS6000_BTC_VOID);
17751 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17752 int nopnds = 0;
17753 tree gpr_type_node;
17754 tree rettype;
17755 tree argtype;
17757 /* It is expected that these htm built-in functions may have
17758 d->icode equal to CODE_FOR_nothing. */
17760 if (TARGET_32BIT && TARGET_POWERPC64)
17761 gpr_type_node = long_long_unsigned_type_node;
17762 else
17763 gpr_type_node = long_unsigned_type_node;
17765 if (attr & RS6000_BTC_SPR)
17767 rettype = gpr_type_node;
17768 argtype = gpr_type_node;
17770 else if (d->code == HTM_BUILTIN_TABORTDC
17771 || d->code == HTM_BUILTIN_TABORTDCI)
17773 rettype = unsigned_type_node;
17774 argtype = gpr_type_node;
17776 else
17778 rettype = unsigned_type_node;
17779 argtype = unsigned_type_node;
17782 if ((mask & builtin_mask) != mask)
17784 if (TARGET_DEBUG_BUILTIN)
17785 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17786 continue;
17789 if (d->name == 0)
17791 if (TARGET_DEBUG_BUILTIN)
17792 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17793 (long unsigned) i);
17794 continue;
17797 op[nopnds++] = (void_func) ? void_type_node : rettype;
17799 if (attr_args == RS6000_BTC_UNARY)
17800 op[nopnds++] = argtype;
17801 else if (attr_args == RS6000_BTC_BINARY)
17803 op[nopnds++] = argtype;
17804 op[nopnds++] = argtype;
17806 else if (attr_args == RS6000_BTC_TERNARY)
17808 op[nopnds++] = argtype;
17809 op[nopnds++] = argtype;
17810 op[nopnds++] = argtype;
17813 switch (nopnds)
17815 case 1:
17816 type = build_function_type_list (op[0], NULL_TREE);
17817 break;
17818 case 2:
17819 type = build_function_type_list (op[0], op[1], NULL_TREE);
17820 break;
17821 case 3:
17822 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17823 break;
17824 case 4:
17825 type = build_function_type_list (op[0], op[1], op[2], op[3],
17826 NULL_TREE);
17827 break;
17828 default:
17829 gcc_unreachable ();
17832 def_builtin (d->name, type, d->code);
17836 /* Hash function for builtin functions with up to 3 arguments and a return
17837 type. */
17838 hashval_t
17839 builtin_hasher::hash (builtin_hash_struct *bh)
17841 unsigned ret = 0;
17842 int i;
17844 for (i = 0; i < 4; i++)
17846 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17847 ret = (ret * 2) + bh->uns_p[i];
17850 return ret;
17853 /* Compare builtin hash entries H1 and H2 for equivalence. */
17854 bool
17855 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17857 return ((p1->mode[0] == p2->mode[0])
17858 && (p1->mode[1] == p2->mode[1])
17859 && (p1->mode[2] == p2->mode[2])
17860 && (p1->mode[3] == p2->mode[3])
17861 && (p1->uns_p[0] == p2->uns_p[0])
17862 && (p1->uns_p[1] == p2->uns_p[1])
17863 && (p1->uns_p[2] == p2->uns_p[2])
17864 && (p1->uns_p[3] == p2->uns_p[3]));
17867 /* Map types for builtin functions with an explicit return type and up to 3
17868 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17869 of the argument. */
17870 static tree
17871 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17872 machine_mode mode_arg1, machine_mode mode_arg2,
17873 enum rs6000_builtins builtin, const char *name)
17875 struct builtin_hash_struct h;
17876 struct builtin_hash_struct *h2;
17877 int num_args = 3;
17878 int i;
17879 tree ret_type = NULL_TREE;
17880 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17882 /* Create builtin_hash_table. */
17883 if (builtin_hash_table == NULL)
17884 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17886 h.type = NULL_TREE;
17887 h.mode[0] = mode_ret;
17888 h.mode[1] = mode_arg0;
17889 h.mode[2] = mode_arg1;
17890 h.mode[3] = mode_arg2;
17891 h.uns_p[0] = 0;
17892 h.uns_p[1] = 0;
17893 h.uns_p[2] = 0;
17894 h.uns_p[3] = 0;
17896 /* If the builtin is a type that produces unsigned results or takes unsigned
17897 arguments, and it is returned as a decl for the vectorizer (such as
17898 widening multiplies, permute), make sure the arguments and return value
17899 are type correct. */
17900 switch (builtin)
17902 /* unsigned 1 argument functions. */
17903 case CRYPTO_BUILTIN_VSBOX:
17904 case P8V_BUILTIN_VGBBD:
17905 case MISC_BUILTIN_CDTBCD:
17906 case MISC_BUILTIN_CBCDTD:
17907 h.uns_p[0] = 1;
17908 h.uns_p[1] = 1;
17909 break;
17911 /* unsigned 2 argument functions. */
17912 case ALTIVEC_BUILTIN_VMULEUB:
17913 case ALTIVEC_BUILTIN_VMULEUH:
17914 case ALTIVEC_BUILTIN_VMULEUW:
17915 case ALTIVEC_BUILTIN_VMULOUB:
17916 case ALTIVEC_BUILTIN_VMULOUH:
17917 case ALTIVEC_BUILTIN_VMULOUW:
17918 case CRYPTO_BUILTIN_VCIPHER:
17919 case CRYPTO_BUILTIN_VCIPHERLAST:
17920 case CRYPTO_BUILTIN_VNCIPHER:
17921 case CRYPTO_BUILTIN_VNCIPHERLAST:
17922 case CRYPTO_BUILTIN_VPMSUMB:
17923 case CRYPTO_BUILTIN_VPMSUMH:
17924 case CRYPTO_BUILTIN_VPMSUMW:
17925 case CRYPTO_BUILTIN_VPMSUMD:
17926 case CRYPTO_BUILTIN_VPMSUM:
17927 case MISC_BUILTIN_ADDG6S:
17928 case MISC_BUILTIN_DIVWEU:
17929 case MISC_BUILTIN_DIVWEUO:
17930 case MISC_BUILTIN_DIVDEU:
17931 case MISC_BUILTIN_DIVDEUO:
17932 case VSX_BUILTIN_UDIV_V2DI:
17933 case ALTIVEC_BUILTIN_VMAXUB:
17934 case ALTIVEC_BUILTIN_VMINUB:
17935 case ALTIVEC_BUILTIN_VMAXUH:
17936 case ALTIVEC_BUILTIN_VMINUH:
17937 case ALTIVEC_BUILTIN_VMAXUW:
17938 case ALTIVEC_BUILTIN_VMINUW:
17939 case P8V_BUILTIN_VMAXUD:
17940 case P8V_BUILTIN_VMINUD:
17941 h.uns_p[0] = 1;
17942 h.uns_p[1] = 1;
17943 h.uns_p[2] = 1;
17944 break;
17946 /* unsigned 3 argument functions. */
17947 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17948 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17949 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17950 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17951 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17952 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17953 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17954 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17955 case VSX_BUILTIN_VPERM_16QI_UNS:
17956 case VSX_BUILTIN_VPERM_8HI_UNS:
17957 case VSX_BUILTIN_VPERM_4SI_UNS:
17958 case VSX_BUILTIN_VPERM_2DI_UNS:
17959 case VSX_BUILTIN_XXSEL_16QI_UNS:
17960 case VSX_BUILTIN_XXSEL_8HI_UNS:
17961 case VSX_BUILTIN_XXSEL_4SI_UNS:
17962 case VSX_BUILTIN_XXSEL_2DI_UNS:
17963 case CRYPTO_BUILTIN_VPERMXOR:
17964 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17965 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17966 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17967 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17968 case CRYPTO_BUILTIN_VSHASIGMAW:
17969 case CRYPTO_BUILTIN_VSHASIGMAD:
17970 case CRYPTO_BUILTIN_VSHASIGMA:
17971 h.uns_p[0] = 1;
17972 h.uns_p[1] = 1;
17973 h.uns_p[2] = 1;
17974 h.uns_p[3] = 1;
17975 break;
17977 /* signed permute functions with unsigned char mask. */
17978 case ALTIVEC_BUILTIN_VPERM_16QI:
17979 case ALTIVEC_BUILTIN_VPERM_8HI:
17980 case ALTIVEC_BUILTIN_VPERM_4SI:
17981 case ALTIVEC_BUILTIN_VPERM_4SF:
17982 case ALTIVEC_BUILTIN_VPERM_2DI:
17983 case ALTIVEC_BUILTIN_VPERM_2DF:
17984 case VSX_BUILTIN_VPERM_16QI:
17985 case VSX_BUILTIN_VPERM_8HI:
17986 case VSX_BUILTIN_VPERM_4SI:
17987 case VSX_BUILTIN_VPERM_4SF:
17988 case VSX_BUILTIN_VPERM_2DI:
17989 case VSX_BUILTIN_VPERM_2DF:
17990 h.uns_p[3] = 1;
17991 break;
17993 /* unsigned args, signed return. */
17994 case VSX_BUILTIN_XVCVUXDSP:
17995 case VSX_BUILTIN_XVCVUXDDP_UNS:
17996 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17997 h.uns_p[1] = 1;
17998 break;
18000 /* signed args, unsigned return. */
18001 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18002 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18003 case MISC_BUILTIN_UNPACK_TD:
18004 case MISC_BUILTIN_UNPACK_V1TI:
18005 h.uns_p[0] = 1;
18006 break;
18008 /* unsigned arguments for 128-bit pack instructions. */
18009 case MISC_BUILTIN_PACK_TD:
18010 case MISC_BUILTIN_PACK_V1TI:
18011 h.uns_p[1] = 1;
18012 h.uns_p[2] = 1;
18013 break;
18015 /* unsigned second arguments (vector shift right). */
18016 case ALTIVEC_BUILTIN_VSRB:
18017 case ALTIVEC_BUILTIN_VSRH:
18018 case ALTIVEC_BUILTIN_VSRW:
18019 case P8V_BUILTIN_VSRD:
18020 h.uns_p[2] = 1;
18021 break;
18023 default:
18024 break;
18027 /* Figure out how many args are present. */
18028 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18029 num_args--;
18031 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18032 if (!ret_type && h.uns_p[0])
18033 ret_type = builtin_mode_to_type[h.mode[0]][0];
18035 if (!ret_type)
18036 fatal_error (input_location,
18037 "internal error: builtin function %s had an unexpected "
18038 "return type %s", name, GET_MODE_NAME (h.mode[0]));
18040 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18041 arg_type[i] = NULL_TREE;
18043 for (i = 0; i < num_args; i++)
18045 int m = (int) h.mode[i+1];
18046 int uns_p = h.uns_p[i+1];
18048 arg_type[i] = builtin_mode_to_type[m][uns_p];
18049 if (!arg_type[i] && uns_p)
18050 arg_type[i] = builtin_mode_to_type[m][0];
18052 if (!arg_type[i])
18053 fatal_error (input_location,
18054 "internal error: builtin function %s, argument %d "
18055 "had unexpected argument type %s", name, i,
18056 GET_MODE_NAME (m));
18059 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18060 if (*found == NULL)
18062 h2 = ggc_alloc<builtin_hash_struct> ();
18063 *h2 = h;
18064 *found = h2;
18066 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18067 arg_type[2], NULL_TREE);
18070 return (*found)->type;
18073 static void
18074 rs6000_common_init_builtins (void)
18076 const struct builtin_description *d;
18077 size_t i;
18079 tree opaque_ftype_opaque = NULL_TREE;
18080 tree opaque_ftype_opaque_opaque = NULL_TREE;
18081 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18082 tree v2si_ftype = NULL_TREE;
18083 tree v2si_ftype_qi = NULL_TREE;
18084 tree v2si_ftype_v2si_qi = NULL_TREE;
18085 tree v2si_ftype_int_qi = NULL_TREE;
18086 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18088 if (!TARGET_PAIRED_FLOAT)
18090 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18091 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18094 /* Paired builtins are only available if you build a compiler with the
18095 appropriate options, so only create those builtins with the appropriate
18096 compiler option. Create Altivec and VSX builtins on machines with at
18097 least the general purpose extensions (970 and newer) to allow the use of
18098 the target attribute.. */
18100 if (TARGET_EXTRA_BUILTINS)
18101 builtin_mask |= RS6000_BTM_COMMON;
18103 /* Add the ternary operators. */
18104 d = bdesc_3arg;
18105 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18107 tree type;
18108 HOST_WIDE_INT mask = d->mask;
18110 if ((mask & builtin_mask) != mask)
18112 if (TARGET_DEBUG_BUILTIN)
18113 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18114 continue;
18117 if (rs6000_overloaded_builtin_p (d->code))
18119 if (! (type = opaque_ftype_opaque_opaque_opaque))
18120 type = opaque_ftype_opaque_opaque_opaque
18121 = build_function_type_list (opaque_V4SI_type_node,
18122 opaque_V4SI_type_node,
18123 opaque_V4SI_type_node,
18124 opaque_V4SI_type_node,
18125 NULL_TREE);
18127 else
18129 enum insn_code icode = d->icode;
18130 if (d->name == 0)
18132 if (TARGET_DEBUG_BUILTIN)
18133 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18134 (long unsigned)i);
18136 continue;
18139 if (icode == CODE_FOR_nothing)
18141 if (TARGET_DEBUG_BUILTIN)
18142 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18143 d->name);
18145 continue;
18148 type = builtin_function_type (insn_data[icode].operand[0].mode,
18149 insn_data[icode].operand[1].mode,
18150 insn_data[icode].operand[2].mode,
18151 insn_data[icode].operand[3].mode,
18152 d->code, d->name);
18155 def_builtin (d->name, type, d->code);
18158 /* Add the binary operators. */
18159 d = bdesc_2arg;
18160 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18162 machine_mode mode0, mode1, mode2;
18163 tree type;
18164 HOST_WIDE_INT mask = d->mask;
18166 if ((mask & builtin_mask) != mask)
18168 if (TARGET_DEBUG_BUILTIN)
18169 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18170 continue;
18173 if (rs6000_overloaded_builtin_p (d->code))
18175 if (! (type = opaque_ftype_opaque_opaque))
18176 type = opaque_ftype_opaque_opaque
18177 = build_function_type_list (opaque_V4SI_type_node,
18178 opaque_V4SI_type_node,
18179 opaque_V4SI_type_node,
18180 NULL_TREE);
18182 else
18184 enum insn_code icode = d->icode;
18185 if (d->name == 0)
18187 if (TARGET_DEBUG_BUILTIN)
18188 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18189 (long unsigned)i);
18191 continue;
18194 if (icode == CODE_FOR_nothing)
18196 if (TARGET_DEBUG_BUILTIN)
18197 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18198 d->name);
18200 continue;
18203 mode0 = insn_data[icode].operand[0].mode;
18204 mode1 = insn_data[icode].operand[1].mode;
18205 mode2 = insn_data[icode].operand[2].mode;
18207 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18209 if (! (type = v2si_ftype_v2si_qi))
18210 type = v2si_ftype_v2si_qi
18211 = build_function_type_list (opaque_V2SI_type_node,
18212 opaque_V2SI_type_node,
18213 char_type_node,
18214 NULL_TREE);
18217 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18218 && mode2 == QImode)
18220 if (! (type = v2si_ftype_int_qi))
18221 type = v2si_ftype_int_qi
18222 = build_function_type_list (opaque_V2SI_type_node,
18223 integer_type_node,
18224 char_type_node,
18225 NULL_TREE);
18228 else
18229 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18230 d->code, d->name);
18233 def_builtin (d->name, type, d->code);
18236 /* Add the simple unary operators. */
18237 d = bdesc_1arg;
18238 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18240 machine_mode mode0, mode1;
18241 tree type;
18242 HOST_WIDE_INT mask = d->mask;
18244 if ((mask & builtin_mask) != mask)
18246 if (TARGET_DEBUG_BUILTIN)
18247 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18248 continue;
18251 if (rs6000_overloaded_builtin_p (d->code))
18253 if (! (type = opaque_ftype_opaque))
18254 type = opaque_ftype_opaque
18255 = build_function_type_list (opaque_V4SI_type_node,
18256 opaque_V4SI_type_node,
18257 NULL_TREE);
18259 else
18261 enum insn_code icode = d->icode;
18262 if (d->name == 0)
18264 if (TARGET_DEBUG_BUILTIN)
18265 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18266 (long unsigned)i);
18268 continue;
18271 if (icode == CODE_FOR_nothing)
18273 if (TARGET_DEBUG_BUILTIN)
18274 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18275 d->name);
18277 continue;
18280 mode0 = insn_data[icode].operand[0].mode;
18281 mode1 = insn_data[icode].operand[1].mode;
18283 if (mode0 == V2SImode && mode1 == QImode)
18285 if (! (type = v2si_ftype_qi))
18286 type = v2si_ftype_qi
18287 = build_function_type_list (opaque_V2SI_type_node,
18288 char_type_node,
18289 NULL_TREE);
18292 else
18293 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18294 d->code, d->name);
18297 def_builtin (d->name, type, d->code);
18300 /* Add the simple no-argument operators. */
18301 d = bdesc_0arg;
18302 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18304 machine_mode mode0;
18305 tree type;
18306 HOST_WIDE_INT mask = d->mask;
18308 if ((mask & builtin_mask) != mask)
18310 if (TARGET_DEBUG_BUILTIN)
18311 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18312 continue;
18314 if (rs6000_overloaded_builtin_p (d->code))
18316 if (!opaque_ftype_opaque)
18317 opaque_ftype_opaque
18318 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18319 type = opaque_ftype_opaque;
18321 else
18323 enum insn_code icode = d->icode;
18324 if (d->name == 0)
18326 if (TARGET_DEBUG_BUILTIN)
18327 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18328 (long unsigned) i);
18329 continue;
18331 if (icode == CODE_FOR_nothing)
18333 if (TARGET_DEBUG_BUILTIN)
18334 fprintf (stderr,
18335 "rs6000_builtin, skip no-argument %s (no code)\n",
18336 d->name);
18337 continue;
18339 mode0 = insn_data[icode].operand[0].mode;
18340 if (mode0 == V2SImode)
18342 /* code for paired single */
18343 if (! (type = v2si_ftype))
18345 v2si_ftype
18346 = build_function_type_list (opaque_V2SI_type_node,
18347 NULL_TREE);
18348 type = v2si_ftype;
18351 else
18352 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18353 d->code, d->name);
18355 def_builtin (d->name, type, d->code);
18359 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18360 static void
18361 init_float128_ibm (machine_mode mode)
18363 if (!TARGET_XL_COMPAT)
18365 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18366 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18367 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18368 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18370 if (!TARGET_HARD_FLOAT)
18372 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18373 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18374 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18375 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18376 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18377 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18378 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18379 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18381 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18382 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18383 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18384 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18385 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18386 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18387 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18388 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18391 else
18393 set_optab_libfunc (add_optab, mode, "_xlqadd");
18394 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18395 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18396 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18399 /* Add various conversions for IFmode to use the traditional TFmode
18400 names. */
18401 if (mode == IFmode)
18403 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18404 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18405 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18406 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18407 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18408 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18410 if (TARGET_POWERPC64)
18412 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18413 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18414 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18415 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18420 /* Set up IEEE 128-bit floating point routines. Use different names if the
18421 arguments can be passed in a vector register. The historical PowerPC
18422 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18423 continue to use that if we aren't using vector registers to pass IEEE
18424 128-bit floating point. */
18426 static void
18427 init_float128_ieee (machine_mode mode)
18429 if (FLOAT128_VECTOR_P (mode))
18431 set_optab_libfunc (add_optab, mode, "__addkf3");
18432 set_optab_libfunc (sub_optab, mode, "__subkf3");
18433 set_optab_libfunc (neg_optab, mode, "__negkf2");
18434 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18435 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18436 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18437 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18439 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18440 set_optab_libfunc (ne_optab, mode, "__nekf2");
18441 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18442 set_optab_libfunc (ge_optab, mode, "__gekf2");
18443 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18444 set_optab_libfunc (le_optab, mode, "__lekf2");
18445 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18447 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18448 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18449 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18450 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18452 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18453 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18454 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18456 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18457 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18458 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18460 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18461 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18462 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18463 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18464 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18465 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18467 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18468 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18469 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18470 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18472 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18473 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18474 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18475 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18477 if (TARGET_POWERPC64)
18479 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18480 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18481 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18482 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18486 else
18488 set_optab_libfunc (add_optab, mode, "_q_add");
18489 set_optab_libfunc (sub_optab, mode, "_q_sub");
18490 set_optab_libfunc (neg_optab, mode, "_q_neg");
18491 set_optab_libfunc (smul_optab, mode, "_q_mul");
18492 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18493 if (TARGET_PPC_GPOPT)
18494 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18496 set_optab_libfunc (eq_optab, mode, "_q_feq");
18497 set_optab_libfunc (ne_optab, mode, "_q_fne");
18498 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18499 set_optab_libfunc (ge_optab, mode, "_q_fge");
18500 set_optab_libfunc (lt_optab, mode, "_q_flt");
18501 set_optab_libfunc (le_optab, mode, "_q_fle");
18503 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18504 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18505 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18506 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18507 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18508 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18509 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18510 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18514 static void
18515 rs6000_init_libfuncs (void)
18517 /* __float128 support. */
18518 if (TARGET_FLOAT128_TYPE)
18520 init_float128_ibm (IFmode);
18521 init_float128_ieee (KFmode);
18524 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18525 if (TARGET_LONG_DOUBLE_128)
18527 if (!TARGET_IEEEQUAD)
18528 init_float128_ibm (TFmode);
18530 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18531 else
18532 init_float128_ieee (TFmode);
18536 /* Emit a potentially record-form instruction, setting DST from SRC.
18537 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18538 signed comparison of DST with zero. If DOT is 1, the generated RTL
18539 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18540 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18541 a separate COMPARE. */
18543 void
18544 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18546 if (dot == 0)
18548 emit_move_insn (dst, src);
18549 return;
18552 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18554 emit_move_insn (dst, src);
18555 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18556 return;
18559 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18560 if (dot == 1)
18562 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18563 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18565 else
18567 rtx set = gen_rtx_SET (dst, src);
18568 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18573 /* A validation routine: say whether CODE, a condition code, and MODE
18574 match. The other alternatives either don't make sense or should
18575 never be generated. */
18577 void
18578 validate_condition_mode (enum rtx_code code, machine_mode mode)
18580 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18581 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18582 && GET_MODE_CLASS (mode) == MODE_CC);
18584 /* These don't make sense. */
18585 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18586 || mode != CCUNSmode);
18588 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18589 || mode == CCUNSmode);
18591 gcc_assert (mode == CCFPmode
18592 || (code != ORDERED && code != UNORDERED
18593 && code != UNEQ && code != LTGT
18594 && code != UNGT && code != UNLT
18595 && code != UNGE && code != UNLE));
18597 /* These should never be generated except for
18598 flag_finite_math_only. */
18599 gcc_assert (mode != CCFPmode
18600 || flag_finite_math_only
18601 || (code != LE && code != GE
18602 && code != UNEQ && code != LTGT
18603 && code != UNGT && code != UNLT));
18605 /* These are invalid; the information is not there. */
18606 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18610 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18611 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18612 not zero, store there the bit offset (counted from the right) where
18613 the single stretch of 1 bits begins; and similarly for B, the bit
18614 offset where it ends. */
18616 bool
18617 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18619 unsigned HOST_WIDE_INT val = INTVAL (mask);
18620 unsigned HOST_WIDE_INT bit;
18621 int nb, ne;
18622 int n = GET_MODE_PRECISION (mode);
18624 if (mode != DImode && mode != SImode)
18625 return false;
18627 if (INTVAL (mask) >= 0)
18629 bit = val & -val;
18630 ne = exact_log2 (bit);
18631 nb = exact_log2 (val + bit);
18633 else if (val + 1 == 0)
18635 nb = n;
18636 ne = 0;
18638 else if (val & 1)
18640 val = ~val;
18641 bit = val & -val;
18642 nb = exact_log2 (bit);
18643 ne = exact_log2 (val + bit);
18645 else
18647 bit = val & -val;
18648 ne = exact_log2 (bit);
18649 if (val + bit == 0)
18650 nb = n;
18651 else
18652 nb = 0;
18655 nb--;
18657 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18658 return false;
18660 if (b)
18661 *b = nb;
18662 if (e)
18663 *e = ne;
18665 return true;
18668 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18669 or rldicr instruction, to implement an AND with it in mode MODE. */
18671 bool
18672 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18674 int nb, ne;
18676 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18677 return false;
18679 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18680 does not wrap. */
18681 if (mode == DImode)
18682 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18684 /* For SImode, rlwinm can do everything. */
18685 if (mode == SImode)
18686 return (nb < 32 && ne < 32);
18688 return false;
18691 /* Return the instruction template for an AND with mask in mode MODE, with
18692 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18694 const char *
18695 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18697 int nb, ne;
18699 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18700 gcc_unreachable ();
18702 if (mode == DImode && ne == 0)
18704 operands[3] = GEN_INT (63 - nb);
18705 if (dot)
18706 return "rldicl. %0,%1,0,%3";
18707 return "rldicl %0,%1,0,%3";
18710 if (mode == DImode && nb == 63)
18712 operands[3] = GEN_INT (63 - ne);
18713 if (dot)
18714 return "rldicr. %0,%1,0,%3";
18715 return "rldicr %0,%1,0,%3";
18718 if (nb < 32 && ne < 32)
18720 operands[3] = GEN_INT (31 - nb);
18721 operands[4] = GEN_INT (31 - ne);
18722 if (dot)
18723 return "rlwinm. %0,%1,0,%3,%4";
18724 return "rlwinm %0,%1,0,%3,%4";
18727 gcc_unreachable ();
18730 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18731 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18732 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18734 bool
18735 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18737 int nb, ne;
18739 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18740 return false;
18742 int n = GET_MODE_PRECISION (mode);
18743 int sh = -1;
18745 if (CONST_INT_P (XEXP (shift, 1)))
18747 sh = INTVAL (XEXP (shift, 1));
18748 if (sh < 0 || sh >= n)
18749 return false;
18752 rtx_code code = GET_CODE (shift);
18754 /* Convert any shift by 0 to a rotate, to simplify below code. */
18755 if (sh == 0)
18756 code = ROTATE;
18758 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18759 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18760 code = ASHIFT;
18761 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18763 code = LSHIFTRT;
18764 sh = n - sh;
18767 /* DImode rotates need rld*. */
18768 if (mode == DImode && code == ROTATE)
18769 return (nb == 63 || ne == 0 || ne == sh);
18771 /* SImode rotates need rlw*. */
18772 if (mode == SImode && code == ROTATE)
18773 return (nb < 32 && ne < 32 && sh < 32);
18775 /* Wrap-around masks are only okay for rotates. */
18776 if (ne > nb)
18777 return false;
18779 /* Variable shifts are only okay for rotates. */
18780 if (sh < 0)
18781 return false;
18783 /* Don't allow ASHIFT if the mask is wrong for that. */
18784 if (code == ASHIFT && ne < sh)
18785 return false;
18787 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18788 if the mask is wrong for that. */
18789 if (nb < 32 && ne < 32 && sh < 32
18790 && !(code == LSHIFTRT && nb >= 32 - sh))
18791 return true;
18793 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18794 if the mask is wrong for that. */
18795 if (code == LSHIFTRT)
18796 sh = 64 - sh;
18797 if (nb == 63 || ne == 0 || ne == sh)
18798 return !(code == LSHIFTRT && nb >= sh);
18800 return false;
18803 /* Return the instruction template for a shift with mask in mode MODE, with
18804 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18806 const char *
18807 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18809 int nb, ne;
18811 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18812 gcc_unreachable ();
18814 if (mode == DImode && ne == 0)
18816 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18817 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18818 operands[3] = GEN_INT (63 - nb);
18819 if (dot)
18820 return "rld%I2cl. %0,%1,%2,%3";
18821 return "rld%I2cl %0,%1,%2,%3";
18824 if (mode == DImode && nb == 63)
18826 operands[3] = GEN_INT (63 - ne);
18827 if (dot)
18828 return "rld%I2cr. %0,%1,%2,%3";
18829 return "rld%I2cr %0,%1,%2,%3";
18832 if (mode == DImode
18833 && GET_CODE (operands[4]) != LSHIFTRT
18834 && CONST_INT_P (operands[2])
18835 && ne == INTVAL (operands[2]))
18837 operands[3] = GEN_INT (63 - nb);
18838 if (dot)
18839 return "rld%I2c. %0,%1,%2,%3";
18840 return "rld%I2c %0,%1,%2,%3";
18843 if (nb < 32 && ne < 32)
18845 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18846 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18847 operands[3] = GEN_INT (31 - nb);
18848 operands[4] = GEN_INT (31 - ne);
18849 /* This insn can also be a 64-bit rotate with mask that really makes
18850 it just a shift right (with mask); the %h below are to adjust for
18851 that situation (shift count is >= 32 in that case). */
18852 if (dot)
18853 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18854 return "rlw%I2nm %0,%1,%h2,%3,%4";
18857 gcc_unreachable ();
18860 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18861 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18862 ASHIFT, or LSHIFTRT) in mode MODE. */
18864 bool
18865 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18867 int nb, ne;
18869 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18870 return false;
18872 int n = GET_MODE_PRECISION (mode);
18874 int sh = INTVAL (XEXP (shift, 1));
18875 if (sh < 0 || sh >= n)
18876 return false;
18878 rtx_code code = GET_CODE (shift);
18880 /* Convert any shift by 0 to a rotate, to simplify below code. */
18881 if (sh == 0)
18882 code = ROTATE;
18884 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18885 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18886 code = ASHIFT;
18887 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18889 code = LSHIFTRT;
18890 sh = n - sh;
18893 /* DImode rotates need rldimi. */
18894 if (mode == DImode && code == ROTATE)
18895 return (ne == sh);
18897 /* SImode rotates need rlwimi. */
18898 if (mode == SImode && code == ROTATE)
18899 return (nb < 32 && ne < 32 && sh < 32);
18901 /* Wrap-around masks are only okay for rotates. */
18902 if (ne > nb)
18903 return false;
18905 /* Don't allow ASHIFT if the mask is wrong for that. */
18906 if (code == ASHIFT && ne < sh)
18907 return false;
18909 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18910 if the mask is wrong for that. */
18911 if (nb < 32 && ne < 32 && sh < 32
18912 && !(code == LSHIFTRT && nb >= 32 - sh))
18913 return true;
18915 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18916 if the mask is wrong for that. */
18917 if (code == LSHIFTRT)
18918 sh = 64 - sh;
18919 if (ne == sh)
18920 return !(code == LSHIFTRT && nb >= sh);
18922 return false;
18925 /* Return the instruction template for an insert with mask in mode MODE, with
18926 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18928 const char *
18929 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18931 int nb, ne;
18933 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18934 gcc_unreachable ();
18936 /* Prefer rldimi because rlwimi is cracked. */
18937 if (TARGET_POWERPC64
18938 && (!dot || mode == DImode)
18939 && GET_CODE (operands[4]) != LSHIFTRT
18940 && ne == INTVAL (operands[2]))
18942 operands[3] = GEN_INT (63 - nb);
18943 if (dot)
18944 return "rldimi. %0,%1,%2,%3";
18945 return "rldimi %0,%1,%2,%3";
18948 if (nb < 32 && ne < 32)
18950 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18951 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18952 operands[3] = GEN_INT (31 - nb);
18953 operands[4] = GEN_INT (31 - ne);
18954 if (dot)
18955 return "rlwimi. %0,%1,%2,%3,%4";
18956 return "rlwimi %0,%1,%2,%3,%4";
18959 gcc_unreachable ();
18962 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18963 using two machine instructions. */
18965 bool
18966 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18968 /* There are two kinds of AND we can handle with two insns:
18969 1) those we can do with two rl* insn;
18970 2) ori[s];xori[s].
18972 We do not handle that last case yet. */
18974 /* If there is just one stretch of ones, we can do it. */
18975 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18976 return true;
18978 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18979 one insn, we can do the whole thing with two. */
18980 unsigned HOST_WIDE_INT val = INTVAL (c);
18981 unsigned HOST_WIDE_INT bit1 = val & -val;
18982 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18983 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18984 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18985 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18988 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18989 If EXPAND is true, split rotate-and-mask instructions we generate to
18990 their constituent parts as well (this is used during expand); if DOT
18991 is 1, make the last insn a record-form instruction clobbering the
18992 destination GPR and setting the CC reg (from operands[3]); if 2, set
18993 that GPR as well as the CC reg. */
18995 void
18996 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18998 gcc_assert (!(expand && dot));
19000 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19002 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19003 shift right. This generates better code than doing the masks without
19004 shifts, or shifting first right and then left. */
19005 int nb, ne;
19006 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19008 gcc_assert (mode == DImode);
19010 int shift = 63 - nb;
19011 if (expand)
19013 rtx tmp1 = gen_reg_rtx (DImode);
19014 rtx tmp2 = gen_reg_rtx (DImode);
19015 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19016 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19017 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19019 else
19021 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19022 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19023 emit_move_insn (operands[0], tmp);
19024 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19025 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19027 return;
19030 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19031 that does the rest. */
19032 unsigned HOST_WIDE_INT bit1 = val & -val;
19033 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19034 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19035 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19037 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19038 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19040 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19042 /* Two "no-rotate"-and-mask instructions, for SImode. */
19043 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19045 gcc_assert (mode == SImode);
19047 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19048 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19049 emit_move_insn (reg, tmp);
19050 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19051 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19052 return;
19055 gcc_assert (mode == DImode);
19057 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19058 insns; we have to do the first in SImode, because it wraps. */
19059 if (mask2 <= 0xffffffff
19060 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19062 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19063 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19064 GEN_INT (mask1));
19065 rtx reg_low = gen_lowpart (SImode, reg);
19066 emit_move_insn (reg_low, tmp);
19067 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19068 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19069 return;
19072 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19073 at the top end), rotate back and clear the other hole. */
19074 int right = exact_log2 (bit3);
19075 int left = 64 - right;
19077 /* Rotate the mask too. */
19078 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19080 if (expand)
19082 rtx tmp1 = gen_reg_rtx (DImode);
19083 rtx tmp2 = gen_reg_rtx (DImode);
19084 rtx tmp3 = gen_reg_rtx (DImode);
19085 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19086 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19087 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19088 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19090 else
19092 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19093 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19094 emit_move_insn (operands[0], tmp);
19095 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19096 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19097 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19101 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19102 for lfq and stfq insns iff the registers are hard registers. */
19105 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
19107 /* We might have been passed a SUBREG. */
19108 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
19109 return 0;
19111 /* We might have been passed non floating point registers. */
19112 if (!FP_REGNO_P (REGNO (reg1))
19113 || !FP_REGNO_P (REGNO (reg2)))
19114 return 0;
19116 return (REGNO (reg1) == REGNO (reg2) - 1);
19119 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19120 addr1 and addr2 must be in consecutive memory locations
19121 (addr2 == addr1 + 8). */
19124 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
19126 rtx addr1, addr2;
19127 unsigned int reg1, reg2;
19128 int offset1, offset2;
19130 /* The mems cannot be volatile. */
19131 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
19132 return 0;
19134 addr1 = XEXP (mem1, 0);
19135 addr2 = XEXP (mem2, 0);
19137 /* Extract an offset (if used) from the first addr. */
19138 if (GET_CODE (addr1) == PLUS)
19140 /* If not a REG, return zero. */
19141 if (GET_CODE (XEXP (addr1, 0)) != REG)
19142 return 0;
19143 else
19145 reg1 = REGNO (XEXP (addr1, 0));
19146 /* The offset must be constant! */
19147 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
19148 return 0;
19149 offset1 = INTVAL (XEXP (addr1, 1));
19152 else if (GET_CODE (addr1) != REG)
19153 return 0;
19154 else
19156 reg1 = REGNO (addr1);
19157 /* This was a simple (mem (reg)) expression. Offset is 0. */
19158 offset1 = 0;
19161 /* And now for the second addr. */
19162 if (GET_CODE (addr2) == PLUS)
19164 /* If not a REG, return zero. */
19165 if (GET_CODE (XEXP (addr2, 0)) != REG)
19166 return 0;
19167 else
19169 reg2 = REGNO (XEXP (addr2, 0));
19170 /* The offset must be constant. */
19171 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
19172 return 0;
19173 offset2 = INTVAL (XEXP (addr2, 1));
19176 else if (GET_CODE (addr2) != REG)
19177 return 0;
19178 else
19180 reg2 = REGNO (addr2);
19181 /* This was a simple (mem (reg)) expression. Offset is 0. */
19182 offset2 = 0;
19185 /* Both of these must have the same base register. */
19186 if (reg1 != reg2)
19187 return 0;
19189 /* The offset for the second addr must be 8 more than the first addr. */
19190 if (offset2 != offset1 + 8)
19191 return 0;
19193 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19194 instructions. */
19195 return 1;
19200 rs6000_secondary_memory_needed_rtx (machine_mode mode)
19202 static bool eliminated = false;
19203 rtx ret;
19205 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
19206 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19207 else
19209 rtx mem = cfun->machine->sdmode_stack_slot;
19210 gcc_assert (mem != NULL_RTX);
19212 if (!eliminated)
19214 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
19215 cfun->machine->sdmode_stack_slot = mem;
19216 eliminated = true;
19218 ret = mem;
19221 if (TARGET_DEBUG_ADDR)
19223 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
19224 GET_MODE_NAME (mode));
19225 if (!ret)
19226 fprintf (stderr, "\tNULL_RTX\n");
19227 else
19228 debug_rtx (ret);
19231 return ret;
19234 /* Return the mode to be used for memory when a secondary memory
19235 location is needed. For SDmode values we need to use DDmode, in
19236 all other cases we can use the same mode. */
19237 machine_mode
19238 rs6000_secondary_memory_needed_mode (machine_mode mode)
19240 if (lra_in_progress && mode == SDmode)
19241 return DDmode;
19242 return mode;
19245 static tree
19246 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
19248 /* Don't walk into types. */
19249 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
19251 *walk_subtrees = 0;
19252 return NULL_TREE;
19255 switch (TREE_CODE (*tp))
19257 case VAR_DECL:
19258 case PARM_DECL:
19259 case FIELD_DECL:
19260 case RESULT_DECL:
19261 case SSA_NAME:
19262 case REAL_CST:
19263 case MEM_REF:
19264 case VIEW_CONVERT_EXPR:
19265 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
19266 return *tp;
19267 break;
19268 default:
19269 break;
19272 return NULL_TREE;
19275 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
19276 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
19277 only work on the traditional altivec registers, note if an altivec register
19278 was chosen. */
19280 static enum rs6000_reg_type
19281 register_to_reg_type (rtx reg, bool *is_altivec)
19283 HOST_WIDE_INT regno;
19284 enum reg_class rclass;
19286 if (GET_CODE (reg) == SUBREG)
19287 reg = SUBREG_REG (reg);
19289 if (!REG_P (reg))
19290 return NO_REG_TYPE;
19292 regno = REGNO (reg);
19293 if (regno >= FIRST_PSEUDO_REGISTER)
19295 if (!lra_in_progress && !reload_in_progress && !reload_completed)
19296 return PSEUDO_REG_TYPE;
19298 regno = true_regnum (reg);
19299 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
19300 return PSEUDO_REG_TYPE;
19303 gcc_assert (regno >= 0);
19305 if (is_altivec && ALTIVEC_REGNO_P (regno))
19306 *is_altivec = true;
19308 rclass = rs6000_regno_regclass[regno];
19309 return reg_class_to_reg_type[(int)rclass];
19312 /* Helper function to return the cost of adding a TOC entry address. */
19314 static inline int
19315 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
19317 int ret;
19319 if (TARGET_CMODEL != CMODEL_SMALL)
19320 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
19322 else
19323 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
19325 return ret;
19328 /* Helper function for rs6000_secondary_reload to determine whether the memory
19329 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19330 needs reloading. Return negative if the memory is not handled by the memory
19331 helper functions and to try a different reload method, 0 if no additional
19332 instructions are need, and positive to give the extra cost for the
19333 memory. */
19335 static int
19336 rs6000_secondary_reload_memory (rtx addr,
19337 enum reg_class rclass,
19338 machine_mode mode)
19340 int extra_cost = 0;
19341 rtx reg, and_arg, plus_arg0, plus_arg1;
19342 addr_mask_type addr_mask;
19343 const char *type = NULL;
19344 const char *fail_msg = NULL;
19346 if (GPR_REG_CLASS_P (rclass))
19347 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19349 else if (rclass == FLOAT_REGS)
19350 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19352 else if (rclass == ALTIVEC_REGS)
19353 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19355 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19356 else if (rclass == VSX_REGS)
19357 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
19358 & ~RELOAD_REG_AND_M16);
19360 /* If the register allocator hasn't made up its mind yet on the register
19361 class to use, settle on defaults to use. */
19362 else if (rclass == NO_REGS)
19364 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
19365 & ~RELOAD_REG_AND_M16);
19367 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
19368 addr_mask &= ~(RELOAD_REG_INDEXED
19369 | RELOAD_REG_PRE_INCDEC
19370 | RELOAD_REG_PRE_MODIFY);
19373 else
19374 addr_mask = 0;
19376 /* If the register isn't valid in this register class, just return now. */
19377 if ((addr_mask & RELOAD_REG_VALID) == 0)
19379 if (TARGET_DEBUG_ADDR)
19381 fprintf (stderr,
19382 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19383 "not valid in class\n",
19384 GET_MODE_NAME (mode), reg_class_names[rclass]);
19385 debug_rtx (addr);
19388 return -1;
19391 switch (GET_CODE (addr))
19393 /* Does the register class supports auto update forms for this mode? We
19394 don't need a scratch register, since the powerpc only supports
19395 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19396 case PRE_INC:
19397 case PRE_DEC:
19398 reg = XEXP (addr, 0);
19399 if (!base_reg_operand (addr, GET_MODE (reg)))
19401 fail_msg = "no base register #1";
19402 extra_cost = -1;
19405 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19407 extra_cost = 1;
19408 type = "update";
19410 break;
19412 case PRE_MODIFY:
19413 reg = XEXP (addr, 0);
19414 plus_arg1 = XEXP (addr, 1);
19415 if (!base_reg_operand (reg, GET_MODE (reg))
19416 || GET_CODE (plus_arg1) != PLUS
19417 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
19419 fail_msg = "bad PRE_MODIFY";
19420 extra_cost = -1;
19423 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19425 extra_cost = 1;
19426 type = "update";
19428 break;
19430 /* Do we need to simulate AND -16 to clear the bottom address bits used
19431 in VMX load/stores? Only allow the AND for vector sizes. */
19432 case AND:
19433 and_arg = XEXP (addr, 0);
19434 if (GET_MODE_SIZE (mode) != 16
19435 || GET_CODE (XEXP (addr, 1)) != CONST_INT
19436 || INTVAL (XEXP (addr, 1)) != -16)
19438 fail_msg = "bad Altivec AND #1";
19439 extra_cost = -1;
19442 if (rclass != ALTIVEC_REGS)
19444 if (legitimate_indirect_address_p (and_arg, false))
19445 extra_cost = 1;
19447 else if (legitimate_indexed_address_p (and_arg, false))
19448 extra_cost = 2;
19450 else
19452 fail_msg = "bad Altivec AND #2";
19453 extra_cost = -1;
19456 type = "and";
19458 break;
19460 /* If this is an indirect address, make sure it is a base register. */
19461 case REG:
19462 case SUBREG:
19463 if (!legitimate_indirect_address_p (addr, false))
19465 extra_cost = 1;
19466 type = "move";
19468 break;
19470 /* If this is an indexed address, make sure the register class can handle
19471 indexed addresses for this mode. */
19472 case PLUS:
19473 plus_arg0 = XEXP (addr, 0);
19474 plus_arg1 = XEXP (addr, 1);
19476 /* (plus (plus (reg) (constant)) (constant)) is generated during
19477 push_reload processing, so handle it now. */
19478 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
19480 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19482 extra_cost = 1;
19483 type = "offset";
19487 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19488 push_reload processing, so handle it now. */
19489 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
19491 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19493 extra_cost = 1;
19494 type = "indexed #2";
19498 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
19500 fail_msg = "no base register #2";
19501 extra_cost = -1;
19504 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
19506 if ((addr_mask & RELOAD_REG_INDEXED) == 0
19507 || !legitimate_indexed_address_p (addr, false))
19509 extra_cost = 1;
19510 type = "indexed";
19514 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
19515 && CONST_INT_P (plus_arg1))
19517 if (!quad_address_offset_p (INTVAL (plus_arg1)))
19519 extra_cost = 1;
19520 type = "vector d-form offset";
19524 /* Make sure the register class can handle offset addresses. */
19525 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19527 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19529 extra_cost = 1;
19530 type = "offset #2";
19534 else
19536 fail_msg = "bad PLUS";
19537 extra_cost = -1;
19540 break;
19542 case LO_SUM:
19543 /* Quad offsets are restricted and can't handle normal addresses. */
19544 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19546 extra_cost = -1;
19547 type = "vector d-form lo_sum";
19550 else if (!legitimate_lo_sum_address_p (mode, addr, false))
19552 fail_msg = "bad LO_SUM";
19553 extra_cost = -1;
19556 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19558 extra_cost = 1;
19559 type = "lo_sum";
19561 break;
19563 /* Static addresses need to create a TOC entry. */
19564 case CONST:
19565 case SYMBOL_REF:
19566 case LABEL_REF:
19567 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19569 extra_cost = -1;
19570 type = "vector d-form lo_sum #2";
19573 else
19575 type = "address";
19576 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19578 break;
19580 /* TOC references look like offsetable memory. */
19581 case UNSPEC:
19582 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19584 fail_msg = "bad UNSPEC";
19585 extra_cost = -1;
19588 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19590 extra_cost = -1;
19591 type = "vector d-form lo_sum #3";
19594 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19596 extra_cost = 1;
19597 type = "toc reference";
19599 break;
19601 default:
19603 fail_msg = "bad address";
19604 extra_cost = -1;
19608 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19610 if (extra_cost < 0)
19611 fprintf (stderr,
19612 "rs6000_secondary_reload_memory error: mode = %s, "
19613 "class = %s, addr_mask = '%s', %s\n",
19614 GET_MODE_NAME (mode),
19615 reg_class_names[rclass],
19616 rs6000_debug_addr_mask (addr_mask, false),
19617 (fail_msg != NULL) ? fail_msg : "<bad address>");
19619 else
19620 fprintf (stderr,
19621 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19622 "addr_mask = '%s', extra cost = %d, %s\n",
19623 GET_MODE_NAME (mode),
19624 reg_class_names[rclass],
19625 rs6000_debug_addr_mask (addr_mask, false),
19626 extra_cost,
19627 (type) ? type : "<none>");
19629 debug_rtx (addr);
19632 return extra_cost;
19635 /* Helper function for rs6000_secondary_reload to return true if a move to a
19636 different register classe is really a simple move. */
19638 static bool
19639 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19640 enum rs6000_reg_type from_type,
19641 machine_mode mode)
19643 int size = GET_MODE_SIZE (mode);
19645 /* Add support for various direct moves available. In this function, we only
19646 look at cases where we don't need any extra registers, and one or more
19647 simple move insns are issued. Originally small integers are not allowed
19648 in FPR/VSX registers. Single precision binary floating is not a simple
19649 move because we need to convert to the single precision memory layout.
19650 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19651 need special direct move handling, which we do not support yet. */
19652 if (TARGET_DIRECT_MOVE
19653 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19654 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19656 if (TARGET_POWERPC64)
19658 /* ISA 2.07: MTVSRD or MVFVSRD. */
19659 if (size == 8)
19660 return true;
19662 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19663 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19664 return true;
19667 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19668 if (TARGET_P8_VECTOR)
19670 if (mode == SImode)
19671 return true;
19673 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19674 return true;
19677 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19678 if (mode == SDmode)
19679 return true;
19682 /* Power6+: MFTGPR or MFFGPR. */
19683 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19684 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19685 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19686 return true;
19688 /* Move to/from SPR. */
19689 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19690 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19691 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19692 return true;
19694 return false;
19697 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19698 special direct moves that involve allocating an extra register, return the
19699 insn code of the helper function if there is such a function or
19700 CODE_FOR_nothing if not. */
19702 static bool
19703 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19704 enum rs6000_reg_type from_type,
19705 machine_mode mode,
19706 secondary_reload_info *sri,
19707 bool altivec_p)
19709 bool ret = false;
19710 enum insn_code icode = CODE_FOR_nothing;
19711 int cost = 0;
19712 int size = GET_MODE_SIZE (mode);
19714 if (TARGET_POWERPC64 && size == 16)
19716 /* Handle moving 128-bit values from GPRs to VSX point registers on
19717 ISA 2.07 (power8, power9) when running in 64-bit mode using
19718 XXPERMDI to glue the two 64-bit values back together. */
19719 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19721 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19722 icode = reg_addr[mode].reload_vsx_gpr;
19725 /* Handle moving 128-bit values from VSX point registers to GPRs on
19726 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19727 bottom 64-bit value. */
19728 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19730 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19731 icode = reg_addr[mode].reload_gpr_vsx;
19735 else if (TARGET_POWERPC64 && mode == SFmode)
19737 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19739 cost = 3; /* xscvdpspn, mfvsrd, and. */
19740 icode = reg_addr[mode].reload_gpr_vsx;
19743 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19745 cost = 2; /* mtvsrz, xscvspdpn. */
19746 icode = reg_addr[mode].reload_vsx_gpr;
19750 else if (!TARGET_POWERPC64 && size == 8)
19752 /* Handle moving 64-bit values from GPRs to floating point registers on
19753 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19754 32-bit values back together. Altivec register classes must be handled
19755 specially since a different instruction is used, and the secondary
19756 reload support requires a single instruction class in the scratch
19757 register constraint. However, right now TFmode is not allowed in
19758 Altivec registers, so the pattern will never match. */
19759 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19761 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19762 icode = reg_addr[mode].reload_fpr_gpr;
19766 if (icode != CODE_FOR_nothing)
19768 ret = true;
19769 if (sri)
19771 sri->icode = icode;
19772 sri->extra_cost = cost;
19776 return ret;
19779 /* Return whether a move between two register classes can be done either
19780 directly (simple move) or via a pattern that uses a single extra temporary
19781 (using ISA 2.07's direct move in this case. */
19783 static bool
19784 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19785 enum rs6000_reg_type from_type,
19786 machine_mode mode,
19787 secondary_reload_info *sri,
19788 bool altivec_p)
19790 /* Fall back to load/store reloads if either type is not a register. */
19791 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19792 return false;
19794 /* If we haven't allocated registers yet, assume the move can be done for the
19795 standard register types. */
19796 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19797 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19798 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19799 return true;
19801 /* Moves to the same set of registers is a simple move for non-specialized
19802 registers. */
19803 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19804 return true;
19806 /* Check whether a simple move can be done directly. */
19807 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19809 if (sri)
19811 sri->icode = CODE_FOR_nothing;
19812 sri->extra_cost = 0;
19814 return true;
19817 /* Now check if we can do it in a few steps. */
19818 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19819 altivec_p);
19822 /* Inform reload about cases where moving X with a mode MODE to a register in
19823 RCLASS requires an extra scratch or immediate register. Return the class
19824 needed for the immediate register.
19826 For VSX and Altivec, we may need a register to convert sp+offset into
19827 reg+sp.
19829 For misaligned 64-bit gpr loads and stores we need a register to
19830 convert an offset address to indirect. */
19832 static reg_class_t
19833 rs6000_secondary_reload (bool in_p,
19834 rtx x,
19835 reg_class_t rclass_i,
19836 machine_mode mode,
19837 secondary_reload_info *sri)
19839 enum reg_class rclass = (enum reg_class) rclass_i;
19840 reg_class_t ret = ALL_REGS;
19841 enum insn_code icode;
19842 bool default_p = false;
19843 bool done_p = false;
19845 /* Allow subreg of memory before/during reload. */
19846 bool memory_p = (MEM_P (x)
19847 || (!reload_completed && GET_CODE (x) == SUBREG
19848 && MEM_P (SUBREG_REG (x))));
19850 sri->icode = CODE_FOR_nothing;
19851 sri->t_icode = CODE_FOR_nothing;
19852 sri->extra_cost = 0;
19853 icode = ((in_p)
19854 ? reg_addr[mode].reload_load
19855 : reg_addr[mode].reload_store);
19857 if (REG_P (x) || register_operand (x, mode))
19859 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19860 bool altivec_p = (rclass == ALTIVEC_REGS);
19861 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19863 if (!in_p)
19864 std::swap (to_type, from_type);
19866 /* Can we do a direct move of some sort? */
19867 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19868 altivec_p))
19870 icode = (enum insn_code)sri->icode;
19871 default_p = false;
19872 done_p = true;
19873 ret = NO_REGS;
19877 /* Make sure 0.0 is not reloaded or forced into memory. */
19878 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19880 ret = NO_REGS;
19881 default_p = false;
19882 done_p = true;
19885 /* If this is a scalar floating point value and we want to load it into the
19886 traditional Altivec registers, do it via a move via a traditional floating
19887 point register, unless we have D-form addressing. Also make sure that
19888 non-zero constants use a FPR. */
19889 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19890 && !mode_supports_vmx_dform (mode)
19891 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19892 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
19894 ret = FLOAT_REGS;
19895 default_p = false;
19896 done_p = true;
19899 /* Handle reload of load/stores if we have reload helper functions. */
19900 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19902 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19903 mode);
19905 if (extra_cost >= 0)
19907 done_p = true;
19908 ret = NO_REGS;
19909 if (extra_cost > 0)
19911 sri->extra_cost = extra_cost;
19912 sri->icode = icode;
19917 /* Handle unaligned loads and stores of integer registers. */
19918 if (!done_p && TARGET_POWERPC64
19919 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19920 && memory_p
19921 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19923 rtx addr = XEXP (x, 0);
19924 rtx off = address_offset (addr);
19926 if (off != NULL_RTX)
19928 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19929 unsigned HOST_WIDE_INT offset = INTVAL (off);
19931 /* We need a secondary reload when our legitimate_address_p
19932 says the address is good (as otherwise the entire address
19933 will be reloaded), and the offset is not a multiple of
19934 four or we have an address wrap. Address wrap will only
19935 occur for LO_SUMs since legitimate_offset_address_p
19936 rejects addresses for 16-byte mems that will wrap. */
19937 if (GET_CODE (addr) == LO_SUM
19938 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19939 && ((offset & 3) != 0
19940 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19941 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19942 && (offset & 3) != 0))
19944 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19945 if (in_p)
19946 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19947 : CODE_FOR_reload_di_load);
19948 else
19949 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19950 : CODE_FOR_reload_di_store);
19951 sri->extra_cost = 2;
19952 ret = NO_REGS;
19953 done_p = true;
19955 else
19956 default_p = true;
19958 else
19959 default_p = true;
19962 if (!done_p && !TARGET_POWERPC64
19963 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19964 && memory_p
19965 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19967 rtx addr = XEXP (x, 0);
19968 rtx off = address_offset (addr);
19970 if (off != NULL_RTX)
19972 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19973 unsigned HOST_WIDE_INT offset = INTVAL (off);
19975 /* We need a secondary reload when our legitimate_address_p
19976 says the address is good (as otherwise the entire address
19977 will be reloaded), and we have a wrap.
19979 legitimate_lo_sum_address_p allows LO_SUM addresses to
19980 have any offset so test for wrap in the low 16 bits.
19982 legitimate_offset_address_p checks for the range
19983 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19984 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19985 [0x7ff4,0x7fff] respectively, so test for the
19986 intersection of these ranges, [0x7ffc,0x7fff] and
19987 [0x7ff4,0x7ff7] respectively.
19989 Note that the address we see here may have been
19990 manipulated by legitimize_reload_address. */
19991 if (GET_CODE (addr) == LO_SUM
19992 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19993 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19995 if (in_p)
19996 sri->icode = CODE_FOR_reload_si_load;
19997 else
19998 sri->icode = CODE_FOR_reload_si_store;
19999 sri->extra_cost = 2;
20000 ret = NO_REGS;
20001 done_p = true;
20003 else
20004 default_p = true;
20006 else
20007 default_p = true;
20010 if (!done_p)
20011 default_p = true;
20013 if (default_p)
20014 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20016 gcc_assert (ret != ALL_REGS);
20018 if (TARGET_DEBUG_ADDR)
20020 fprintf (stderr,
20021 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20022 "mode = %s",
20023 reg_class_names[ret],
20024 in_p ? "true" : "false",
20025 reg_class_names[rclass],
20026 GET_MODE_NAME (mode));
20028 if (reload_completed)
20029 fputs (", after reload", stderr);
20031 if (!done_p)
20032 fputs (", done_p not set", stderr);
20034 if (default_p)
20035 fputs (", default secondary reload", stderr);
20037 if (sri->icode != CODE_FOR_nothing)
20038 fprintf (stderr, ", reload func = %s, extra cost = %d",
20039 insn_data[sri->icode].name, sri->extra_cost);
20041 else if (sri->extra_cost > 0)
20042 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20044 fputs ("\n", stderr);
20045 debug_rtx (x);
20048 return ret;
20051 /* Better tracing for rs6000_secondary_reload_inner. */
20053 static void
20054 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20055 bool store_p)
20057 rtx set, clobber;
20059 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20061 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20062 store_p ? "store" : "load");
20064 if (store_p)
20065 set = gen_rtx_SET (mem, reg);
20066 else
20067 set = gen_rtx_SET (reg, mem);
20069 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20070 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20073 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20074 ATTRIBUTE_NORETURN;
20076 static void
20077 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20078 bool store_p)
20080 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20081 gcc_unreachable ();
20084 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20085 reload helper functions. These were identified in
20086 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20087 reload, it calls the insns:
20088 reload_<RELOAD:mode>_<P:mptrsize>_store
20089 reload_<RELOAD:mode>_<P:mptrsize>_load
20091 which in turn calls this function, to do whatever is necessary to create
20092 valid addresses. */
20094 void
20095 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20097 int regno = true_regnum (reg);
20098 machine_mode mode = GET_MODE (reg);
20099 addr_mask_type addr_mask;
20100 rtx addr;
20101 rtx new_addr;
20102 rtx op_reg, op0, op1;
20103 rtx and_op;
20104 rtx cc_clobber;
20105 rtvec rv;
20107 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
20108 || !base_reg_operand (scratch, GET_MODE (scratch)))
20109 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20111 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
20112 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20114 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
20115 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20117 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
20118 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20120 else
20121 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20123 /* Make sure the mode is valid in this register class. */
20124 if ((addr_mask & RELOAD_REG_VALID) == 0)
20125 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20127 if (TARGET_DEBUG_ADDR)
20128 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
20130 new_addr = addr = XEXP (mem, 0);
20131 switch (GET_CODE (addr))
20133 /* Does the register class support auto update forms for this mode? If
20134 not, do the update now. We don't need a scratch register, since the
20135 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20136 case PRE_INC:
20137 case PRE_DEC:
20138 op_reg = XEXP (addr, 0);
20139 if (!base_reg_operand (op_reg, Pmode))
20140 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20142 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20144 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
20145 new_addr = op_reg;
20147 break;
20149 case PRE_MODIFY:
20150 op0 = XEXP (addr, 0);
20151 op1 = XEXP (addr, 1);
20152 if (!base_reg_operand (op0, Pmode)
20153 || GET_CODE (op1) != PLUS
20154 || !rtx_equal_p (op0, XEXP (op1, 0)))
20155 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20157 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20159 emit_insn (gen_rtx_SET (op0, op1));
20160 new_addr = reg;
20162 break;
20164 /* Do we need to simulate AND -16 to clear the bottom address bits used
20165 in VMX load/stores? */
20166 case AND:
20167 op0 = XEXP (addr, 0);
20168 op1 = XEXP (addr, 1);
20169 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
20171 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
20172 op_reg = op0;
20174 else if (GET_CODE (op1) == PLUS)
20176 emit_insn (gen_rtx_SET (scratch, op1));
20177 op_reg = scratch;
20180 else
20181 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20183 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
20184 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
20185 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
20186 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
20187 new_addr = scratch;
20189 break;
20191 /* If this is an indirect address, make sure it is a base register. */
20192 case REG:
20193 case SUBREG:
20194 if (!base_reg_operand (addr, GET_MODE (addr)))
20196 emit_insn (gen_rtx_SET (scratch, addr));
20197 new_addr = scratch;
20199 break;
20201 /* If this is an indexed address, make sure the register class can handle
20202 indexed addresses for this mode. */
20203 case PLUS:
20204 op0 = XEXP (addr, 0);
20205 op1 = XEXP (addr, 1);
20206 if (!base_reg_operand (op0, Pmode))
20207 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20209 else if (int_reg_operand (op1, Pmode))
20211 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20213 emit_insn (gen_rtx_SET (scratch, addr));
20214 new_addr = scratch;
20218 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
20220 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
20221 || !quad_address_p (addr, mode, false))
20223 emit_insn (gen_rtx_SET (scratch, addr));
20224 new_addr = scratch;
20228 /* Make sure the register class can handle offset addresses. */
20229 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20231 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20233 emit_insn (gen_rtx_SET (scratch, addr));
20234 new_addr = scratch;
20238 else
20239 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20241 break;
20243 case LO_SUM:
20244 op0 = XEXP (addr, 0);
20245 op1 = XEXP (addr, 1);
20246 if (!base_reg_operand (op0, Pmode))
20247 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20249 else if (int_reg_operand (op1, Pmode))
20251 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20253 emit_insn (gen_rtx_SET (scratch, addr));
20254 new_addr = scratch;
20258 /* Quad offsets are restricted and can't handle normal addresses. */
20259 else if (mode_supports_vsx_dform_quad (mode))
20261 emit_insn (gen_rtx_SET (scratch, addr));
20262 new_addr = scratch;
20265 /* Make sure the register class can handle offset addresses. */
20266 else if (legitimate_lo_sum_address_p (mode, addr, false))
20268 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20270 emit_insn (gen_rtx_SET (scratch, addr));
20271 new_addr = scratch;
20275 else
20276 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20278 break;
20280 case SYMBOL_REF:
20281 case CONST:
20282 case LABEL_REF:
20283 rs6000_emit_move (scratch, addr, Pmode);
20284 new_addr = scratch;
20285 break;
20287 default:
20288 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20291 /* Adjust the address if it changed. */
20292 if (addr != new_addr)
20294 mem = replace_equiv_address_nv (mem, new_addr);
20295 if (TARGET_DEBUG_ADDR)
20296 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
20299 /* Now create the move. */
20300 if (store_p)
20301 emit_insn (gen_rtx_SET (mem, reg));
20302 else
20303 emit_insn (gen_rtx_SET (reg, mem));
20305 return;
20308 /* Convert reloads involving 64-bit gprs and misaligned offset
20309 addressing, or multiple 32-bit gprs and offsets that are too large,
20310 to use indirect addressing. */
20312 void
20313 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
20315 int regno = true_regnum (reg);
20316 enum reg_class rclass;
20317 rtx addr;
20318 rtx scratch_or_premodify = scratch;
20320 if (TARGET_DEBUG_ADDR)
20322 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
20323 store_p ? "store" : "load");
20324 fprintf (stderr, "reg:\n");
20325 debug_rtx (reg);
20326 fprintf (stderr, "mem:\n");
20327 debug_rtx (mem);
20328 fprintf (stderr, "scratch:\n");
20329 debug_rtx (scratch);
20332 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
20333 gcc_assert (GET_CODE (mem) == MEM);
20334 rclass = REGNO_REG_CLASS (regno);
20335 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
20336 addr = XEXP (mem, 0);
20338 if (GET_CODE (addr) == PRE_MODIFY)
20340 gcc_assert (REG_P (XEXP (addr, 0))
20341 && GET_CODE (XEXP (addr, 1)) == PLUS
20342 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
20343 scratch_or_premodify = XEXP (addr, 0);
20344 if (!HARD_REGISTER_P (scratch_or_premodify))
20345 /* If we have a pseudo here then reload will have arranged
20346 to have it replaced, but only in the original insn.
20347 Use the replacement here too. */
20348 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
20350 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
20351 expressions from the original insn, without unsharing them.
20352 Any RTL that points into the original insn will of course
20353 have register replacements applied. That is why we don't
20354 need to look for replacements under the PLUS. */
20355 addr = XEXP (addr, 1);
20357 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
20359 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
20361 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
20363 /* Now create the move. */
20364 if (store_p)
20365 emit_insn (gen_rtx_SET (mem, reg));
20366 else
20367 emit_insn (gen_rtx_SET (reg, mem));
20369 return;
20372 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
20373 this function has any SDmode references. If we are on a power7 or later, we
20374 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
20375 can load/store the value. */
20377 static void
20378 rs6000_alloc_sdmode_stack_slot (void)
20380 tree t;
20381 basic_block bb;
20382 gimple_stmt_iterator gsi;
20384 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
20385 /* We use a different approach for dealing with the secondary
20386 memory in LRA. */
20387 if (ira_use_lra_p)
20388 return;
20390 if (TARGET_NO_SDMODE_STACK)
20391 return;
20393 FOR_EACH_BB_FN (bb, cfun)
20394 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
20396 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
20397 if (ret)
20399 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20400 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20401 SDmode, 0);
20402 return;
20406 /* Check for any SDmode parameters of the function. */
20407 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
20409 if (TREE_TYPE (t) == error_mark_node)
20410 continue;
20412 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
20413 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
20415 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20416 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20417 SDmode, 0);
20418 return;
20423 static void
20424 rs6000_instantiate_decls (void)
20426 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
20427 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
20430 /* Given an rtx X being reloaded into a reg required to be
20431 in class CLASS, return the class of reg to actually use.
20432 In general this is just CLASS; but on some machines
20433 in some cases it is preferable to use a more restrictive class.
20435 On the RS/6000, we have to return NO_REGS when we want to reload a
20436 floating-point CONST_DOUBLE to force it to be copied to memory.
20438 We also don't want to reload integer values into floating-point
20439 registers if we can at all help it. In fact, this can
20440 cause reload to die, if it tries to generate a reload of CTR
20441 into a FP register and discovers it doesn't have the memory location
20442 required.
20444 ??? Would it be a good idea to have reload do the converse, that is
20445 try to reload floating modes into FP registers if possible?
20448 static enum reg_class
20449 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
20451 machine_mode mode = GET_MODE (x);
20452 bool is_constant = CONSTANT_P (x);
20454 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20455 reload class for it. */
20456 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20457 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
20458 return NO_REGS;
20460 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
20461 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20462 return NO_REGS;
20464 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20465 the reloading of address expressions using PLUS into floating point
20466 registers. */
20467 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
20469 if (is_constant)
20471 /* Zero is always allowed in all VSX registers. */
20472 if (x == CONST0_RTX (mode))
20473 return rclass;
20475 /* If this is a vector constant that can be formed with a few Altivec
20476 instructions, we want altivec registers. */
20477 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
20478 return ALTIVEC_REGS;
20480 /* If this is an integer constant that can easily be loaded into
20481 vector registers, allow it. */
20482 if (CONST_INT_P (x))
20484 HOST_WIDE_INT value = INTVAL (x);
20486 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
20487 2.06 can generate it in the Altivec registers with
20488 VSPLTI<x>. */
20489 if (value == -1)
20491 if (TARGET_P8_VECTOR)
20492 return rclass;
20493 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20494 return ALTIVEC_REGS;
20495 else
20496 return NO_REGS;
20499 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
20500 a sign extend in the Altivec registers. */
20501 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
20502 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
20503 return ALTIVEC_REGS;
20506 /* Force constant to memory. */
20507 return NO_REGS;
20510 /* D-form addressing can easily reload the value. */
20511 if (mode_supports_vmx_dform (mode)
20512 || mode_supports_vsx_dform_quad (mode))
20513 return rclass;
20515 /* If this is a scalar floating point value and we don't have D-form
20516 addressing, prefer the traditional floating point registers so that we
20517 can use D-form (register+offset) addressing. */
20518 if (rclass == VSX_REGS
20519 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
20520 return FLOAT_REGS;
20522 /* Prefer the Altivec registers if Altivec is handling the vector
20523 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20524 loads. */
20525 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
20526 || mode == V1TImode)
20527 return ALTIVEC_REGS;
20529 return rclass;
20532 if (is_constant || GET_CODE (x) == PLUS)
20534 if (reg_class_subset_p (GENERAL_REGS, rclass))
20535 return GENERAL_REGS;
20536 if (reg_class_subset_p (BASE_REGS, rclass))
20537 return BASE_REGS;
20538 return NO_REGS;
20541 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
20542 return GENERAL_REGS;
20544 return rclass;
20547 /* Debug version of rs6000_preferred_reload_class. */
20548 static enum reg_class
20549 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
20551 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
20553 fprintf (stderr,
20554 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20555 "mode = %s, x:\n",
20556 reg_class_names[ret], reg_class_names[rclass],
20557 GET_MODE_NAME (GET_MODE (x)));
20558 debug_rtx (x);
20560 return ret;
20563 /* If we are copying between FP or AltiVec registers and anything else, we need
20564 a memory location. The exception is when we are targeting ppc64 and the
20565 move to/from fpr to gpr instructions are available. Also, under VSX, you
20566 can copy vector registers from the FP register set to the Altivec register
20567 set and vice versa. */
20569 static bool
20570 rs6000_secondary_memory_needed (enum reg_class from_class,
20571 enum reg_class to_class,
20572 machine_mode mode)
20574 enum rs6000_reg_type from_type, to_type;
20575 bool altivec_p = ((from_class == ALTIVEC_REGS)
20576 || (to_class == ALTIVEC_REGS));
20578 /* If a simple/direct move is available, we don't need secondary memory */
20579 from_type = reg_class_to_reg_type[(int)from_class];
20580 to_type = reg_class_to_reg_type[(int)to_class];
20582 if (rs6000_secondary_reload_move (to_type, from_type, mode,
20583 (secondary_reload_info *)0, altivec_p))
20584 return false;
20586 /* If we have a floating point or vector register class, we need to use
20587 memory to transfer the data. */
20588 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
20589 return true;
20591 return false;
20594 /* Debug version of rs6000_secondary_memory_needed. */
20595 static bool
20596 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
20597 enum reg_class to_class,
20598 machine_mode mode)
20600 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
20602 fprintf (stderr,
20603 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20604 "to_class = %s, mode = %s\n",
20605 ret ? "true" : "false",
20606 reg_class_names[from_class],
20607 reg_class_names[to_class],
20608 GET_MODE_NAME (mode));
20610 return ret;
20613 /* Return the register class of a scratch register needed to copy IN into
20614 or out of a register in RCLASS in MODE. If it can be done directly,
20615 NO_REGS is returned. */
20617 static enum reg_class
20618 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
20619 rtx in)
20621 int regno;
20623 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
20624 #if TARGET_MACHO
20625 && MACHOPIC_INDIRECT
20626 #endif
20629 /* We cannot copy a symbolic operand directly into anything
20630 other than BASE_REGS for TARGET_ELF. So indicate that a
20631 register from BASE_REGS is needed as an intermediate
20632 register.
20634 On Darwin, pic addresses require a load from memory, which
20635 needs a base register. */
20636 if (rclass != BASE_REGS
20637 && (GET_CODE (in) == SYMBOL_REF
20638 || GET_CODE (in) == HIGH
20639 || GET_CODE (in) == LABEL_REF
20640 || GET_CODE (in) == CONST))
20641 return BASE_REGS;
20644 if (GET_CODE (in) == REG)
20646 regno = REGNO (in);
20647 if (regno >= FIRST_PSEUDO_REGISTER)
20649 regno = true_regnum (in);
20650 if (regno >= FIRST_PSEUDO_REGISTER)
20651 regno = -1;
20654 else if (GET_CODE (in) == SUBREG)
20656 regno = true_regnum (in);
20657 if (regno >= FIRST_PSEUDO_REGISTER)
20658 regno = -1;
20660 else
20661 regno = -1;
20663 /* If we have VSX register moves, prefer moving scalar values between
20664 Altivec registers and GPR by going via an FPR (and then via memory)
20665 instead of reloading the secondary memory address for Altivec moves. */
20666 if (TARGET_VSX
20667 && GET_MODE_SIZE (mode) < 16
20668 && !mode_supports_vmx_dform (mode)
20669 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20670 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20671 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20672 && (regno >= 0 && INT_REGNO_P (regno)))))
20673 return FLOAT_REGS;
20675 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20676 into anything. */
20677 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20678 || (regno >= 0 && INT_REGNO_P (regno)))
20679 return NO_REGS;
20681 /* Constants, memory, and VSX registers can go into VSX registers (both the
20682 traditional floating point and the altivec registers). */
20683 if (rclass == VSX_REGS
20684 && (regno == -1 || VSX_REGNO_P (regno)))
20685 return NO_REGS;
20687 /* Constants, memory, and FP registers can go into FP registers. */
20688 if ((regno == -1 || FP_REGNO_P (regno))
20689 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20690 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20692 /* Memory, and AltiVec registers can go into AltiVec registers. */
20693 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20694 && rclass == ALTIVEC_REGS)
20695 return NO_REGS;
20697 /* We can copy among the CR registers. */
20698 if ((rclass == CR_REGS || rclass == CR0_REGS)
20699 && regno >= 0 && CR_REGNO_P (regno))
20700 return NO_REGS;
20702 /* Otherwise, we need GENERAL_REGS. */
20703 return GENERAL_REGS;
20706 /* Debug version of rs6000_secondary_reload_class. */
20707 static enum reg_class
20708 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20709 machine_mode mode, rtx in)
20711 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20712 fprintf (stderr,
20713 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20714 "mode = %s, input rtx:\n",
20715 reg_class_names[ret], reg_class_names[rclass],
20716 GET_MODE_NAME (mode));
20717 debug_rtx (in);
20719 return ret;
20722 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
20724 static bool
20725 rs6000_cannot_change_mode_class (machine_mode from,
20726 machine_mode to,
20727 enum reg_class rclass)
20729 unsigned from_size = GET_MODE_SIZE (from);
20730 unsigned to_size = GET_MODE_SIZE (to);
20732 if (from_size != to_size)
20734 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20736 if (reg_classes_intersect_p (xclass, rclass))
20738 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
20739 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
20740 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20741 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20743 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20744 single register under VSX because the scalar part of the register
20745 is in the upper 64-bits, and not the lower 64-bits. Types like
20746 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20747 IEEE floating point can't overlap, and neither can small
20748 values. */
20750 if (to_float128_vector_p && from_float128_vector_p)
20751 return false;
20753 else if (to_float128_vector_p || from_float128_vector_p)
20754 return true;
20756 /* TDmode in floating-mode registers must always go into a register
20757 pair with the most significant word in the even-numbered register
20758 to match ISA requirements. In little-endian mode, this does not
20759 match subreg numbering, so we cannot allow subregs. */
20760 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20761 return true;
20763 if (from_size < 8 || to_size < 8)
20764 return true;
20766 if (from_size == 8 && (8 * to_nregs) != to_size)
20767 return true;
20769 if (to_size == 8 && (8 * from_nregs) != from_size)
20770 return true;
20772 return false;
20774 else
20775 return false;
20778 /* Since the VSX register set includes traditional floating point registers
20779 and altivec registers, just check for the size being different instead of
20780 trying to check whether the modes are vector modes. Otherwise it won't
20781 allow say DF and DI to change classes. For types like TFmode and TDmode
20782 that take 2 64-bit registers, rather than a single 128-bit register, don't
20783 allow subregs of those types to other 128 bit types. */
20784 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20786 unsigned num_regs = (from_size + 15) / 16;
20787 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
20788 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
20789 return true;
20791 return (from_size != 8 && from_size != 16);
20794 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20795 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20796 return true;
20798 return false;
20801 /* Debug version of rs6000_cannot_change_mode_class. */
20802 static bool
20803 rs6000_debug_cannot_change_mode_class (machine_mode from,
20804 machine_mode to,
20805 enum reg_class rclass)
20807 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
20809 fprintf (stderr,
20810 "rs6000_cannot_change_mode_class, return %s, from = %s, "
20811 "to = %s, rclass = %s\n",
20812 ret ? "true" : "false",
20813 GET_MODE_NAME (from), GET_MODE_NAME (to),
20814 reg_class_names[rclass]);
20816 return ret;
20819 /* Return a string to do a move operation of 128 bits of data. */
20821 const char *
20822 rs6000_output_move_128bit (rtx operands[])
20824 rtx dest = operands[0];
20825 rtx src = operands[1];
20826 machine_mode mode = GET_MODE (dest);
20827 int dest_regno;
20828 int src_regno;
20829 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20830 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20832 if (REG_P (dest))
20834 dest_regno = REGNO (dest);
20835 dest_gpr_p = INT_REGNO_P (dest_regno);
20836 dest_fp_p = FP_REGNO_P (dest_regno);
20837 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20838 dest_vsx_p = dest_fp_p | dest_vmx_p;
20840 else
20842 dest_regno = -1;
20843 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20846 if (REG_P (src))
20848 src_regno = REGNO (src);
20849 src_gpr_p = INT_REGNO_P (src_regno);
20850 src_fp_p = FP_REGNO_P (src_regno);
20851 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20852 src_vsx_p = src_fp_p | src_vmx_p;
20854 else
20856 src_regno = -1;
20857 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20860 /* Register moves. */
20861 if (dest_regno >= 0 && src_regno >= 0)
20863 if (dest_gpr_p)
20865 if (src_gpr_p)
20866 return "#";
20868 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20869 return (WORDS_BIG_ENDIAN
20870 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20871 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20873 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20874 return "#";
20877 else if (TARGET_VSX && dest_vsx_p)
20879 if (src_vsx_p)
20880 return "xxlor %x0,%x1,%x1";
20882 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20883 return (WORDS_BIG_ENDIAN
20884 ? "mtvsrdd %x0,%1,%L1"
20885 : "mtvsrdd %x0,%L1,%1");
20887 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20888 return "#";
20891 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20892 return "vor %0,%1,%1";
20894 else if (dest_fp_p && src_fp_p)
20895 return "#";
20898 /* Loads. */
20899 else if (dest_regno >= 0 && MEM_P (src))
20901 if (dest_gpr_p)
20903 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20904 return "lq %0,%1";
20905 else
20906 return "#";
20909 else if (TARGET_ALTIVEC && dest_vmx_p
20910 && altivec_indexed_or_indirect_operand (src, mode))
20911 return "lvx %0,%y1";
20913 else if (TARGET_VSX && dest_vsx_p)
20915 if (mode_supports_vsx_dform_quad (mode)
20916 && quad_address_p (XEXP (src, 0), mode, true))
20917 return "lxv %x0,%1";
20919 else if (TARGET_P9_VECTOR)
20920 return "lxvx %x0,%y1";
20922 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20923 return "lxvw4x %x0,%y1";
20925 else
20926 return "lxvd2x %x0,%y1";
20929 else if (TARGET_ALTIVEC && dest_vmx_p)
20930 return "lvx %0,%y1";
20932 else if (dest_fp_p)
20933 return "#";
20936 /* Stores. */
20937 else if (src_regno >= 0 && MEM_P (dest))
20939 if (src_gpr_p)
20941 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20942 return "stq %1,%0";
20943 else
20944 return "#";
20947 else if (TARGET_ALTIVEC && src_vmx_p
20948 && altivec_indexed_or_indirect_operand (src, mode))
20949 return "stvx %1,%y0";
20951 else if (TARGET_VSX && src_vsx_p)
20953 if (mode_supports_vsx_dform_quad (mode)
20954 && quad_address_p (XEXP (dest, 0), mode, true))
20955 return "stxv %x1,%0";
20957 else if (TARGET_P9_VECTOR)
20958 return "stxvx %x1,%y0";
20960 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20961 return "stxvw4x %x1,%y0";
20963 else
20964 return "stxvd2x %x1,%y0";
20967 else if (TARGET_ALTIVEC && src_vmx_p)
20968 return "stvx %1,%y0";
20970 else if (src_fp_p)
20971 return "#";
20974 /* Constants. */
20975 else if (dest_regno >= 0
20976 && (GET_CODE (src) == CONST_INT
20977 || GET_CODE (src) == CONST_WIDE_INT
20978 || GET_CODE (src) == CONST_DOUBLE
20979 || GET_CODE (src) == CONST_VECTOR))
20981 if (dest_gpr_p)
20982 return "#";
20984 else if ((dest_vmx_p && TARGET_ALTIVEC)
20985 || (dest_vsx_p && TARGET_VSX))
20986 return output_vec_const_move (operands);
20989 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20992 /* Validate a 128-bit move. */
20993 bool
20994 rs6000_move_128bit_ok_p (rtx operands[])
20996 machine_mode mode = GET_MODE (operands[0]);
20997 return (gpc_reg_operand (operands[0], mode)
20998 || gpc_reg_operand (operands[1], mode));
21001 /* Return true if a 128-bit move needs to be split. */
21002 bool
21003 rs6000_split_128bit_ok_p (rtx operands[])
21005 if (!reload_completed)
21006 return false;
21008 if (!gpr_or_gpr_p (operands[0], operands[1]))
21009 return false;
21011 if (quad_load_store_p (operands[0], operands[1]))
21012 return false;
21014 return true;
21018 /* Given a comparison operation, return the bit number in CCR to test. We
21019 know this is a valid comparison.
21021 SCC_P is 1 if this is for an scc. That means that %D will have been
21022 used instead of %C, so the bits will be in different places.
21024 Return -1 if OP isn't a valid comparison for some reason. */
21027 ccr_bit (rtx op, int scc_p)
21029 enum rtx_code code = GET_CODE (op);
21030 machine_mode cc_mode;
21031 int cc_regnum;
21032 int base_bit;
21033 rtx reg;
21035 if (!COMPARISON_P (op))
21036 return -1;
21038 reg = XEXP (op, 0);
21040 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21042 cc_mode = GET_MODE (reg);
21043 cc_regnum = REGNO (reg);
21044 base_bit = 4 * (cc_regnum - CR0_REGNO);
21046 validate_condition_mode (code, cc_mode);
21048 /* When generating a sCOND operation, only positive conditions are
21049 allowed. */
21050 gcc_assert (!scc_p
21051 || code == EQ || code == GT || code == LT || code == UNORDERED
21052 || code == GTU || code == LTU);
21054 switch (code)
21056 case NE:
21057 return scc_p ? base_bit + 3 : base_bit + 2;
21058 case EQ:
21059 return base_bit + 2;
21060 case GT: case GTU: case UNLE:
21061 return base_bit + 1;
21062 case LT: case LTU: case UNGE:
21063 return base_bit;
21064 case ORDERED: case UNORDERED:
21065 return base_bit + 3;
21067 case GE: case GEU:
21068 /* If scc, we will have done a cror to put the bit in the
21069 unordered position. So test that bit. For integer, this is ! LT
21070 unless this is an scc insn. */
21071 return scc_p ? base_bit + 3 : base_bit;
21073 case LE: case LEU:
21074 return scc_p ? base_bit + 3 : base_bit + 1;
21076 default:
21077 gcc_unreachable ();
21081 /* Return the GOT register. */
21084 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21086 /* The second flow pass currently (June 1999) can't update
21087 regs_ever_live without disturbing other parts of the compiler, so
21088 update it here to make the prolog/epilogue code happy. */
21089 if (!can_create_pseudo_p ()
21090 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21091 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21093 crtl->uses_pic_offset_table = 1;
21095 return pic_offset_table_rtx;
21098 static rs6000_stack_t stack_info;
21100 /* Function to init struct machine_function.
21101 This will be called, via a pointer variable,
21102 from push_function_context. */
21104 static struct machine_function *
21105 rs6000_init_machine_status (void)
21107 stack_info.reload_completed = 0;
21108 return ggc_cleared_alloc<machine_function> ();
21111 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21113 /* Write out a function code label. */
21115 void
21116 rs6000_output_function_entry (FILE *file, const char *fname)
21118 if (fname[0] != '.')
21120 switch (DEFAULT_ABI)
21122 default:
21123 gcc_unreachable ();
21125 case ABI_AIX:
21126 if (DOT_SYMBOLS)
21127 putc ('.', file);
21128 else
21129 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21130 break;
21132 case ABI_ELFv2:
21133 case ABI_V4:
21134 case ABI_DARWIN:
21135 break;
21139 RS6000_OUTPUT_BASENAME (file, fname);
21142 /* Print an operand. Recognize special options, documented below. */
21144 #if TARGET_ELF
21145 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21146 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21147 #else
21148 #define SMALL_DATA_RELOC "sda21"
21149 #define SMALL_DATA_REG 0
21150 #endif
21152 void
21153 print_operand (FILE *file, rtx x, int code)
21155 int i;
21156 unsigned HOST_WIDE_INT uval;
21158 switch (code)
21160 /* %a is output_address. */
21162 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21163 output_operand. */
21165 case 'D':
21166 /* Like 'J' but get to the GT bit only. */
21167 gcc_assert (REG_P (x));
21169 /* Bit 1 is GT bit. */
21170 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
21172 /* Add one for shift count in rlinm for scc. */
21173 fprintf (file, "%d", i + 1);
21174 return;
21176 case 'e':
21177 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21178 if (! INT_P (x))
21180 output_operand_lossage ("invalid %%e value");
21181 return;
21184 uval = INTVAL (x);
21185 if ((uval & 0xffff) == 0 && uval != 0)
21186 putc ('s', file);
21187 return;
21189 case 'E':
21190 /* X is a CR register. Print the number of the EQ bit of the CR */
21191 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21192 output_operand_lossage ("invalid %%E value");
21193 else
21194 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
21195 return;
21197 case 'f':
21198 /* X is a CR register. Print the shift count needed to move it
21199 to the high-order four bits. */
21200 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21201 output_operand_lossage ("invalid %%f value");
21202 else
21203 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
21204 return;
21206 case 'F':
21207 /* Similar, but print the count for the rotate in the opposite
21208 direction. */
21209 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21210 output_operand_lossage ("invalid %%F value");
21211 else
21212 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
21213 return;
21215 case 'G':
21216 /* X is a constant integer. If it is negative, print "m",
21217 otherwise print "z". This is to make an aze or ame insn. */
21218 if (GET_CODE (x) != CONST_INT)
21219 output_operand_lossage ("invalid %%G value");
21220 else if (INTVAL (x) >= 0)
21221 putc ('z', file);
21222 else
21223 putc ('m', file);
21224 return;
21226 case 'h':
21227 /* If constant, output low-order five bits. Otherwise, write
21228 normally. */
21229 if (INT_P (x))
21230 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
21231 else
21232 print_operand (file, x, 0);
21233 return;
21235 case 'H':
21236 /* If constant, output low-order six bits. Otherwise, write
21237 normally. */
21238 if (INT_P (x))
21239 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
21240 else
21241 print_operand (file, x, 0);
21242 return;
21244 case 'I':
21245 /* Print `i' if this is a constant, else nothing. */
21246 if (INT_P (x))
21247 putc ('i', file);
21248 return;
21250 case 'j':
21251 /* Write the bit number in CCR for jump. */
21252 i = ccr_bit (x, 0);
21253 if (i == -1)
21254 output_operand_lossage ("invalid %%j code");
21255 else
21256 fprintf (file, "%d", i);
21257 return;
21259 case 'J':
21260 /* Similar, but add one for shift count in rlinm for scc and pass
21261 scc flag to `ccr_bit'. */
21262 i = ccr_bit (x, 1);
21263 if (i == -1)
21264 output_operand_lossage ("invalid %%J code");
21265 else
21266 /* If we want bit 31, write a shift count of zero, not 32. */
21267 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21268 return;
21270 case 'k':
21271 /* X must be a constant. Write the 1's complement of the
21272 constant. */
21273 if (! INT_P (x))
21274 output_operand_lossage ("invalid %%k value");
21275 else
21276 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
21277 return;
21279 case 'K':
21280 /* X must be a symbolic constant on ELF. Write an
21281 expression suitable for an 'addi' that adds in the low 16
21282 bits of the MEM. */
21283 if (GET_CODE (x) == CONST)
21285 if (GET_CODE (XEXP (x, 0)) != PLUS
21286 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
21287 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
21288 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
21289 output_operand_lossage ("invalid %%K value");
21291 print_operand_address (file, x);
21292 fputs ("@l", file);
21293 return;
21295 /* %l is output_asm_label. */
21297 case 'L':
21298 /* Write second word of DImode or DFmode reference. Works on register
21299 or non-indexed memory only. */
21300 if (REG_P (x))
21301 fputs (reg_names[REGNO (x) + 1], file);
21302 else if (MEM_P (x))
21304 machine_mode mode = GET_MODE (x);
21305 /* Handle possible auto-increment. Since it is pre-increment and
21306 we have already done it, we can just use an offset of word. */
21307 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21308 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21309 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21310 UNITS_PER_WORD));
21311 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21312 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21313 UNITS_PER_WORD));
21314 else
21315 output_address (mode, XEXP (adjust_address_nv (x, SImode,
21316 UNITS_PER_WORD),
21317 0));
21319 if (small_data_operand (x, GET_MODE (x)))
21320 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21321 reg_names[SMALL_DATA_REG]);
21323 return;
21325 case 'N':
21326 /* Write the number of elements in the vector times 4. */
21327 if (GET_CODE (x) != PARALLEL)
21328 output_operand_lossage ("invalid %%N value");
21329 else
21330 fprintf (file, "%d", XVECLEN (x, 0) * 4);
21331 return;
21333 case 'O':
21334 /* Similar, but subtract 1 first. */
21335 if (GET_CODE (x) != PARALLEL)
21336 output_operand_lossage ("invalid %%O value");
21337 else
21338 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
21339 return;
21341 case 'p':
21342 /* X is a CONST_INT that is a power of two. Output the logarithm. */
21343 if (! INT_P (x)
21344 || INTVAL (x) < 0
21345 || (i = exact_log2 (INTVAL (x))) < 0)
21346 output_operand_lossage ("invalid %%p value");
21347 else
21348 fprintf (file, "%d", i);
21349 return;
21351 case 'P':
21352 /* The operand must be an indirect memory reference. The result
21353 is the register name. */
21354 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
21355 || REGNO (XEXP (x, 0)) >= 32)
21356 output_operand_lossage ("invalid %%P value");
21357 else
21358 fputs (reg_names[REGNO (XEXP (x, 0))], file);
21359 return;
21361 case 'q':
21362 /* This outputs the logical code corresponding to a boolean
21363 expression. The expression may have one or both operands
21364 negated (if one, only the first one). For condition register
21365 logical operations, it will also treat the negated
21366 CR codes as NOTs, but not handle NOTs of them. */
21368 const char *const *t = 0;
21369 const char *s;
21370 enum rtx_code code = GET_CODE (x);
21371 static const char * const tbl[3][3] = {
21372 { "and", "andc", "nor" },
21373 { "or", "orc", "nand" },
21374 { "xor", "eqv", "xor" } };
21376 if (code == AND)
21377 t = tbl[0];
21378 else if (code == IOR)
21379 t = tbl[1];
21380 else if (code == XOR)
21381 t = tbl[2];
21382 else
21383 output_operand_lossage ("invalid %%q value");
21385 if (GET_CODE (XEXP (x, 0)) != NOT)
21386 s = t[0];
21387 else
21389 if (GET_CODE (XEXP (x, 1)) == NOT)
21390 s = t[2];
21391 else
21392 s = t[1];
21395 fputs (s, file);
21397 return;
21399 case 'Q':
21400 if (! TARGET_MFCRF)
21401 return;
21402 fputc (',', file);
21403 /* FALLTHRU */
21405 case 'R':
21406 /* X is a CR register. Print the mask for `mtcrf'. */
21407 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21408 output_operand_lossage ("invalid %%R value");
21409 else
21410 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
21411 return;
21413 case 's':
21414 /* Low 5 bits of 32 - value */
21415 if (! INT_P (x))
21416 output_operand_lossage ("invalid %%s value");
21417 else
21418 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
21419 return;
21421 case 't':
21422 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21423 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
21425 /* Bit 3 is OV bit. */
21426 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
21428 /* If we want bit 31, write a shift count of zero, not 32. */
21429 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21430 return;
21432 case 'T':
21433 /* Print the symbolic name of a branch target register. */
21434 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
21435 && REGNO (x) != CTR_REGNO))
21436 output_operand_lossage ("invalid %%T value");
21437 else if (REGNO (x) == LR_REGNO)
21438 fputs ("lr", file);
21439 else
21440 fputs ("ctr", file);
21441 return;
21443 case 'u':
21444 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21445 for use in unsigned operand. */
21446 if (! INT_P (x))
21448 output_operand_lossage ("invalid %%u value");
21449 return;
21452 uval = INTVAL (x);
21453 if ((uval & 0xffff) == 0)
21454 uval >>= 16;
21456 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
21457 return;
21459 case 'v':
21460 /* High-order 16 bits of constant for use in signed operand. */
21461 if (! INT_P (x))
21462 output_operand_lossage ("invalid %%v value");
21463 else
21464 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
21465 (INTVAL (x) >> 16) & 0xffff);
21466 return;
21468 case 'U':
21469 /* Print `u' if this has an auto-increment or auto-decrement. */
21470 if (MEM_P (x)
21471 && (GET_CODE (XEXP (x, 0)) == PRE_INC
21472 || GET_CODE (XEXP (x, 0)) == PRE_DEC
21473 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
21474 putc ('u', file);
21475 return;
21477 case 'V':
21478 /* Print the trap code for this operand. */
21479 switch (GET_CODE (x))
21481 case EQ:
21482 fputs ("eq", file); /* 4 */
21483 break;
21484 case NE:
21485 fputs ("ne", file); /* 24 */
21486 break;
21487 case LT:
21488 fputs ("lt", file); /* 16 */
21489 break;
21490 case LE:
21491 fputs ("le", file); /* 20 */
21492 break;
21493 case GT:
21494 fputs ("gt", file); /* 8 */
21495 break;
21496 case GE:
21497 fputs ("ge", file); /* 12 */
21498 break;
21499 case LTU:
21500 fputs ("llt", file); /* 2 */
21501 break;
21502 case LEU:
21503 fputs ("lle", file); /* 6 */
21504 break;
21505 case GTU:
21506 fputs ("lgt", file); /* 1 */
21507 break;
21508 case GEU:
21509 fputs ("lge", file); /* 5 */
21510 break;
21511 default:
21512 gcc_unreachable ();
21514 break;
21516 case 'w':
21517 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21518 normally. */
21519 if (INT_P (x))
21520 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
21521 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
21522 else
21523 print_operand (file, x, 0);
21524 return;
21526 case 'x':
21527 /* X is a FPR or Altivec register used in a VSX context. */
21528 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
21529 output_operand_lossage ("invalid %%x value");
21530 else
21532 int reg = REGNO (x);
21533 int vsx_reg = (FP_REGNO_P (reg)
21534 ? reg - 32
21535 : reg - FIRST_ALTIVEC_REGNO + 32);
21537 #ifdef TARGET_REGNAMES
21538 if (TARGET_REGNAMES)
21539 fprintf (file, "%%vs%d", vsx_reg);
21540 else
21541 #endif
21542 fprintf (file, "%d", vsx_reg);
21544 return;
21546 case 'X':
21547 if (MEM_P (x)
21548 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
21549 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
21550 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
21551 putc ('x', file);
21552 return;
21554 case 'Y':
21555 /* Like 'L', for third word of TImode/PTImode */
21556 if (REG_P (x))
21557 fputs (reg_names[REGNO (x) + 2], file);
21558 else if (MEM_P (x))
21560 machine_mode mode = GET_MODE (x);
21561 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21562 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21563 output_address (mode, plus_constant (Pmode,
21564 XEXP (XEXP (x, 0), 0), 8));
21565 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21566 output_address (mode, plus_constant (Pmode,
21567 XEXP (XEXP (x, 0), 0), 8));
21568 else
21569 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
21570 if (small_data_operand (x, GET_MODE (x)))
21571 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21572 reg_names[SMALL_DATA_REG]);
21574 return;
21576 case 'z':
21577 /* X is a SYMBOL_REF. Write out the name preceded by a
21578 period and without any trailing data in brackets. Used for function
21579 names. If we are configured for System V (or the embedded ABI) on
21580 the PowerPC, do not emit the period, since those systems do not use
21581 TOCs and the like. */
21582 gcc_assert (GET_CODE (x) == SYMBOL_REF);
21584 /* For macho, check to see if we need a stub. */
21585 if (TARGET_MACHO)
21587 const char *name = XSTR (x, 0);
21588 #if TARGET_MACHO
21589 if (darwin_emit_branch_islands
21590 && MACHOPIC_INDIRECT
21591 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
21592 name = machopic_indirection_name (x, /*stub_p=*/true);
21593 #endif
21594 assemble_name (file, name);
21596 else if (!DOT_SYMBOLS)
21597 assemble_name (file, XSTR (x, 0));
21598 else
21599 rs6000_output_function_entry (file, XSTR (x, 0));
21600 return;
21602 case 'Z':
21603 /* Like 'L', for last word of TImode/PTImode. */
21604 if (REG_P (x))
21605 fputs (reg_names[REGNO (x) + 3], file);
21606 else if (MEM_P (x))
21608 machine_mode mode = GET_MODE (x);
21609 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21610 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21611 output_address (mode, plus_constant (Pmode,
21612 XEXP (XEXP (x, 0), 0), 12));
21613 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21614 output_address (mode, plus_constant (Pmode,
21615 XEXP (XEXP (x, 0), 0), 12));
21616 else
21617 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21618 if (small_data_operand (x, GET_MODE (x)))
21619 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21620 reg_names[SMALL_DATA_REG]);
21622 return;
21624 /* Print AltiVec memory operand. */
21625 case 'y':
21627 rtx tmp;
21629 gcc_assert (MEM_P (x));
21631 tmp = XEXP (x, 0);
21633 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
21634 && GET_CODE (tmp) == AND
21635 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
21636 && INTVAL (XEXP (tmp, 1)) == -16)
21637 tmp = XEXP (tmp, 0);
21638 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21639 && GET_CODE (tmp) == PRE_MODIFY)
21640 tmp = XEXP (tmp, 1);
21641 if (REG_P (tmp))
21642 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21643 else
21645 if (GET_CODE (tmp) != PLUS
21646 || !REG_P (XEXP (tmp, 0))
21647 || !REG_P (XEXP (tmp, 1)))
21649 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21650 break;
21653 if (REGNO (XEXP (tmp, 0)) == 0)
21654 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21655 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21656 else
21657 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21658 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21660 break;
21663 case 0:
21664 if (REG_P (x))
21665 fprintf (file, "%s", reg_names[REGNO (x)]);
21666 else if (MEM_P (x))
21668 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21669 know the width from the mode. */
21670 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21671 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21672 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21673 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21674 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21675 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21676 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21677 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21678 else
21679 output_address (GET_MODE (x), XEXP (x, 0));
21681 else
21683 if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21684 /* This hack along with a corresponding hack in
21685 rs6000_output_addr_const_extra arranges to output addends
21686 where the assembler expects to find them. eg.
21687 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21688 without this hack would be output as "x@toc+4". We
21689 want "x+4@toc". */
21690 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21691 else
21692 output_addr_const (file, x);
21694 return;
21696 case '&':
21697 if (const char *name = get_some_local_dynamic_name ())
21698 assemble_name (file, name);
21699 else
21700 output_operand_lossage ("'%%&' used without any "
21701 "local dynamic TLS references");
21702 return;
21704 default:
21705 output_operand_lossage ("invalid %%xn code");
21709 /* Print the address of an operand. */
21711 void
21712 print_operand_address (FILE *file, rtx x)
21714 if (REG_P (x))
21715 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21716 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21717 || GET_CODE (x) == LABEL_REF)
21719 output_addr_const (file, x);
21720 if (small_data_operand (x, GET_MODE (x)))
21721 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21722 reg_names[SMALL_DATA_REG]);
21723 else
21724 gcc_assert (!TARGET_TOC);
21726 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21727 && REG_P (XEXP (x, 1)))
21729 if (REGNO (XEXP (x, 0)) == 0)
21730 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21731 reg_names[ REGNO (XEXP (x, 0)) ]);
21732 else
21733 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21734 reg_names[ REGNO (XEXP (x, 1)) ]);
21736 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21737 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21738 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21739 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21740 #if TARGET_MACHO
21741 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21742 && CONSTANT_P (XEXP (x, 1)))
21744 fprintf (file, "lo16(");
21745 output_addr_const (file, XEXP (x, 1));
21746 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21748 #endif
21749 #if TARGET_ELF
21750 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21751 && CONSTANT_P (XEXP (x, 1)))
21753 output_addr_const (file, XEXP (x, 1));
21754 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21756 #endif
21757 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21759 /* This hack along with a corresponding hack in
21760 rs6000_output_addr_const_extra arranges to output addends
21761 where the assembler expects to find them. eg.
21762 (lo_sum (reg 9)
21763 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21764 without this hack would be output as "x@toc+8@l(9)". We
21765 want "x+8@toc@l(9)". */
21766 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21767 if (GET_CODE (x) == LO_SUM)
21768 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21769 else
21770 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21772 else
21773 gcc_unreachable ();
21776 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21778 static bool
21779 rs6000_output_addr_const_extra (FILE *file, rtx x)
21781 if (GET_CODE (x) == UNSPEC)
21782 switch (XINT (x, 1))
21784 case UNSPEC_TOCREL:
21785 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21786 && REG_P (XVECEXP (x, 0, 1))
21787 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21788 output_addr_const (file, XVECEXP (x, 0, 0));
21789 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21791 if (INTVAL (tocrel_offset_oac) >= 0)
21792 fprintf (file, "+");
21793 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21795 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21797 putc ('-', file);
21798 assemble_name (file, toc_label_name);
21799 need_toc_init = 1;
21801 else if (TARGET_ELF)
21802 fputs ("@toc", file);
21803 return true;
21805 #if TARGET_MACHO
21806 case UNSPEC_MACHOPIC_OFFSET:
21807 output_addr_const (file, XVECEXP (x, 0, 0));
21808 putc ('-', file);
21809 machopic_output_function_base_name (file);
21810 return true;
21811 #endif
21813 return false;
21816 /* Target hook for assembling integer objects. The PowerPC version has
21817 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21818 is defined. It also needs to handle DI-mode objects on 64-bit
21819 targets. */
21821 static bool
21822 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21824 #ifdef RELOCATABLE_NEEDS_FIXUP
21825 /* Special handling for SI values. */
21826 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21828 static int recurse = 0;
21830 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21831 the .fixup section. Since the TOC section is already relocated, we
21832 don't need to mark it here. We used to skip the text section, but it
21833 should never be valid for relocated addresses to be placed in the text
21834 section. */
21835 if (DEFAULT_ABI == ABI_V4
21836 && (TARGET_RELOCATABLE || flag_pic > 1)
21837 && in_section != toc_section
21838 && !recurse
21839 && !CONST_SCALAR_INT_P (x)
21840 && CONSTANT_P (x))
21842 char buf[256];
21844 recurse = 1;
21845 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21846 fixuplabelno++;
21847 ASM_OUTPUT_LABEL (asm_out_file, buf);
21848 fprintf (asm_out_file, "\t.long\t(");
21849 output_addr_const (asm_out_file, x);
21850 fprintf (asm_out_file, ")@fixup\n");
21851 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21852 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21853 fprintf (asm_out_file, "\t.long\t");
21854 assemble_name (asm_out_file, buf);
21855 fprintf (asm_out_file, "\n\t.previous\n");
21856 recurse = 0;
21857 return true;
21859 /* Remove initial .'s to turn a -mcall-aixdesc function
21860 address into the address of the descriptor, not the function
21861 itself. */
21862 else if (GET_CODE (x) == SYMBOL_REF
21863 && XSTR (x, 0)[0] == '.'
21864 && DEFAULT_ABI == ABI_AIX)
21866 const char *name = XSTR (x, 0);
21867 while (*name == '.')
21868 name++;
21870 fprintf (asm_out_file, "\t.long\t%s\n", name);
21871 return true;
21874 #endif /* RELOCATABLE_NEEDS_FIXUP */
21875 return default_assemble_integer (x, size, aligned_p);
21878 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21879 /* Emit an assembler directive to set symbol visibility for DECL to
21880 VISIBILITY_TYPE. */
21882 static void
21883 rs6000_assemble_visibility (tree decl, int vis)
21885 if (TARGET_XCOFF)
21886 return;
21888 /* Functions need to have their entry point symbol visibility set as
21889 well as their descriptor symbol visibility. */
21890 if (DEFAULT_ABI == ABI_AIX
21891 && DOT_SYMBOLS
21892 && TREE_CODE (decl) == FUNCTION_DECL)
21894 static const char * const visibility_types[] = {
21895 NULL, "protected", "hidden", "internal"
21898 const char *name, *type;
21900 name = ((* targetm.strip_name_encoding)
21901 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21902 type = visibility_types[vis];
21904 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21905 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21907 else
21908 default_assemble_visibility (decl, vis);
21910 #endif
21912 enum rtx_code
21913 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21915 /* Reversal of FP compares takes care -- an ordered compare
21916 becomes an unordered compare and vice versa. */
21917 if (mode == CCFPmode
21918 && (!flag_finite_math_only
21919 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21920 || code == UNEQ || code == LTGT))
21921 return reverse_condition_maybe_unordered (code);
21922 else
21923 return reverse_condition (code);
21926 /* Generate a compare for CODE. Return a brand-new rtx that
21927 represents the result of the compare. */
21929 static rtx
21930 rs6000_generate_compare (rtx cmp, machine_mode mode)
21932 machine_mode comp_mode;
21933 rtx compare_result;
21934 enum rtx_code code = GET_CODE (cmp);
21935 rtx op0 = XEXP (cmp, 0);
21936 rtx op1 = XEXP (cmp, 1);
21938 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21939 comp_mode = CCmode;
21940 else if (FLOAT_MODE_P (mode))
21941 comp_mode = CCFPmode;
21942 else if (code == GTU || code == LTU
21943 || code == GEU || code == LEU)
21944 comp_mode = CCUNSmode;
21945 else if ((code == EQ || code == NE)
21946 && unsigned_reg_p (op0)
21947 && (unsigned_reg_p (op1)
21948 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21949 /* These are unsigned values, perhaps there will be a later
21950 ordering compare that can be shared with this one. */
21951 comp_mode = CCUNSmode;
21952 else
21953 comp_mode = CCmode;
21955 /* If we have an unsigned compare, make sure we don't have a signed value as
21956 an immediate. */
21957 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
21958 && INTVAL (op1) < 0)
21960 op0 = copy_rtx_if_shared (op0);
21961 op1 = force_reg (GET_MODE (op0), op1);
21962 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21965 /* First, the compare. */
21966 compare_result = gen_reg_rtx (comp_mode);
21968 /* IEEE 128-bit support in VSX registers when we do not have hardware
21969 support. */
21970 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21972 rtx libfunc = NULL_RTX;
21973 bool check_nan = false;
21974 rtx dest;
21976 switch (code)
21978 case EQ:
21979 case NE:
21980 libfunc = optab_libfunc (eq_optab, mode);
21981 break;
21983 case GT:
21984 case GE:
21985 libfunc = optab_libfunc (ge_optab, mode);
21986 break;
21988 case LT:
21989 case LE:
21990 libfunc = optab_libfunc (le_optab, mode);
21991 break;
21993 case UNORDERED:
21994 case ORDERED:
21995 libfunc = optab_libfunc (unord_optab, mode);
21996 code = (code == UNORDERED) ? NE : EQ;
21997 break;
21999 case UNGE:
22000 case UNGT:
22001 check_nan = true;
22002 libfunc = optab_libfunc (ge_optab, mode);
22003 code = (code == UNGE) ? GE : GT;
22004 break;
22006 case UNLE:
22007 case UNLT:
22008 check_nan = true;
22009 libfunc = optab_libfunc (le_optab, mode);
22010 code = (code == UNLE) ? LE : LT;
22011 break;
22013 case UNEQ:
22014 case LTGT:
22015 check_nan = true;
22016 libfunc = optab_libfunc (eq_optab, mode);
22017 code = (code = UNEQ) ? EQ : NE;
22018 break;
22020 default:
22021 gcc_unreachable ();
22024 gcc_assert (libfunc);
22026 if (!check_nan)
22027 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22028 SImode, 2, op0, mode, op1, mode);
22030 /* The library signals an exception for signalling NaNs, so we need to
22031 handle isgreater, etc. by first checking isordered. */
22032 else
22034 rtx ne_rtx, normal_dest, unord_dest;
22035 rtx unord_func = optab_libfunc (unord_optab, mode);
22036 rtx join_label = gen_label_rtx ();
22037 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
22038 rtx unord_cmp = gen_reg_rtx (comp_mode);
22041 /* Test for either value being a NaN. */
22042 gcc_assert (unord_func);
22043 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
22044 SImode, 2, op0, mode, op1,
22045 mode);
22047 /* Set value (0) if either value is a NaN, and jump to the join
22048 label. */
22049 dest = gen_reg_rtx (SImode);
22050 emit_move_insn (dest, const1_rtx);
22051 emit_insn (gen_rtx_SET (unord_cmp,
22052 gen_rtx_COMPARE (comp_mode, unord_dest,
22053 const0_rtx)));
22055 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
22056 emit_jump_insn (gen_rtx_SET (pc_rtx,
22057 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22058 join_ref,
22059 pc_rtx)));
22061 /* Do the normal comparison, knowing that the values are not
22062 NaNs. */
22063 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22064 SImode, 2, op0, mode, op1,
22065 mode);
22067 emit_insn (gen_cstoresi4 (dest,
22068 gen_rtx_fmt_ee (code, SImode, normal_dest,
22069 const0_rtx),
22070 normal_dest, const0_rtx));
22072 /* Join NaN and non-Nan paths. Compare dest against 0. */
22073 emit_label (join_label);
22074 code = NE;
22077 emit_insn (gen_rtx_SET (compare_result,
22078 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22081 else
22083 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22084 CLOBBERs to match cmptf_internal2 pattern. */
22085 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22086 && FLOAT128_IBM_P (GET_MODE (op0))
22087 && TARGET_HARD_FLOAT)
22088 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22089 gen_rtvec (10,
22090 gen_rtx_SET (compare_result,
22091 gen_rtx_COMPARE (comp_mode, op0, op1)),
22092 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22093 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22094 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22095 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22096 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22097 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22098 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22099 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22100 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
22101 else if (GET_CODE (op1) == UNSPEC
22102 && XINT (op1, 1) == UNSPEC_SP_TEST)
22104 rtx op1b = XVECEXP (op1, 0, 0);
22105 comp_mode = CCEQmode;
22106 compare_result = gen_reg_rtx (CCEQmode);
22107 if (TARGET_64BIT)
22108 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
22109 else
22110 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
22112 else
22113 emit_insn (gen_rtx_SET (compare_result,
22114 gen_rtx_COMPARE (comp_mode, op0, op1)));
22117 /* Some kinds of FP comparisons need an OR operation;
22118 under flag_finite_math_only we don't bother. */
22119 if (FLOAT_MODE_P (mode)
22120 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
22121 && !flag_finite_math_only
22122 && (code == LE || code == GE
22123 || code == UNEQ || code == LTGT
22124 || code == UNGT || code == UNLT))
22126 enum rtx_code or1, or2;
22127 rtx or1_rtx, or2_rtx, compare2_rtx;
22128 rtx or_result = gen_reg_rtx (CCEQmode);
22130 switch (code)
22132 case LE: or1 = LT; or2 = EQ; break;
22133 case GE: or1 = GT; or2 = EQ; break;
22134 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
22135 case LTGT: or1 = LT; or2 = GT; break;
22136 case UNGT: or1 = UNORDERED; or2 = GT; break;
22137 case UNLT: or1 = UNORDERED; or2 = LT; break;
22138 default: gcc_unreachable ();
22140 validate_condition_mode (or1, comp_mode);
22141 validate_condition_mode (or2, comp_mode);
22142 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
22143 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
22144 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
22145 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
22146 const_true_rtx);
22147 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
22149 compare_result = or_result;
22150 code = EQ;
22153 validate_condition_mode (code, GET_MODE (compare_result));
22155 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
22159 /* Return the diagnostic message string if the binary operation OP is
22160 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22162 static const char*
22163 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
22164 const_tree type1,
22165 const_tree type2)
22167 machine_mode mode1 = TYPE_MODE (type1);
22168 machine_mode mode2 = TYPE_MODE (type2);
22170 /* For complex modes, use the inner type. */
22171 if (COMPLEX_MODE_P (mode1))
22172 mode1 = GET_MODE_INNER (mode1);
22174 if (COMPLEX_MODE_P (mode2))
22175 mode2 = GET_MODE_INNER (mode2);
22177 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22178 double to intermix unless -mfloat128-convert. */
22179 if (mode1 == mode2)
22180 return NULL;
22182 if (!TARGET_FLOAT128_CVT)
22184 if ((mode1 == KFmode && mode2 == IFmode)
22185 || (mode1 == IFmode && mode2 == KFmode))
22186 return N_("__float128 and __ibm128 cannot be used in the same "
22187 "expression");
22189 if (TARGET_IEEEQUAD
22190 && ((mode1 == IFmode && mode2 == TFmode)
22191 || (mode1 == TFmode && mode2 == IFmode)))
22192 return N_("__ibm128 and long double cannot be used in the same "
22193 "expression");
22195 if (!TARGET_IEEEQUAD
22196 && ((mode1 == KFmode && mode2 == TFmode)
22197 || (mode1 == TFmode && mode2 == KFmode)))
22198 return N_("__float128 and long double cannot be used in the same "
22199 "expression");
22202 return NULL;
22206 /* Expand floating point conversion to/from __float128 and __ibm128. */
22208 void
22209 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
22211 machine_mode dest_mode = GET_MODE (dest);
22212 machine_mode src_mode = GET_MODE (src);
22213 convert_optab cvt = unknown_optab;
22214 bool do_move = false;
22215 rtx libfunc = NULL_RTX;
22216 rtx dest2;
22217 typedef rtx (*rtx_2func_t) (rtx, rtx);
22218 rtx_2func_t hw_convert = (rtx_2func_t)0;
22219 size_t kf_or_tf;
22221 struct hw_conv_t {
22222 rtx_2func_t from_df;
22223 rtx_2func_t from_sf;
22224 rtx_2func_t from_si_sign;
22225 rtx_2func_t from_si_uns;
22226 rtx_2func_t from_di_sign;
22227 rtx_2func_t from_di_uns;
22228 rtx_2func_t to_df;
22229 rtx_2func_t to_sf;
22230 rtx_2func_t to_si_sign;
22231 rtx_2func_t to_si_uns;
22232 rtx_2func_t to_di_sign;
22233 rtx_2func_t to_di_uns;
22234 } hw_conversions[2] = {
22235 /* convertions to/from KFmode */
22237 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
22238 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
22239 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
22240 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
22241 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
22242 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
22243 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
22244 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
22245 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
22246 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
22247 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
22248 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
22251 /* convertions to/from TFmode */
22253 gen_extenddftf2_hw, /* TFmode <- DFmode. */
22254 gen_extendsftf2_hw, /* TFmode <- SFmode. */
22255 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
22256 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
22257 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
22258 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
22259 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
22260 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
22261 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
22262 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
22263 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
22264 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
22268 if (dest_mode == src_mode)
22269 gcc_unreachable ();
22271 /* Eliminate memory operations. */
22272 if (MEM_P (src))
22273 src = force_reg (src_mode, src);
22275 if (MEM_P (dest))
22277 rtx tmp = gen_reg_rtx (dest_mode);
22278 rs6000_expand_float128_convert (tmp, src, unsigned_p);
22279 rs6000_emit_move (dest, tmp, dest_mode);
22280 return;
22283 /* Convert to IEEE 128-bit floating point. */
22284 if (FLOAT128_IEEE_P (dest_mode))
22286 if (dest_mode == KFmode)
22287 kf_or_tf = 0;
22288 else if (dest_mode == TFmode)
22289 kf_or_tf = 1;
22290 else
22291 gcc_unreachable ();
22293 switch (src_mode)
22295 case DFmode:
22296 cvt = sext_optab;
22297 hw_convert = hw_conversions[kf_or_tf].from_df;
22298 break;
22300 case SFmode:
22301 cvt = sext_optab;
22302 hw_convert = hw_conversions[kf_or_tf].from_sf;
22303 break;
22305 case KFmode:
22306 case IFmode:
22307 case TFmode:
22308 if (FLOAT128_IBM_P (src_mode))
22309 cvt = sext_optab;
22310 else
22311 do_move = true;
22312 break;
22314 case SImode:
22315 if (unsigned_p)
22317 cvt = ufloat_optab;
22318 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22320 else
22322 cvt = sfloat_optab;
22323 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22325 break;
22327 case DImode:
22328 if (unsigned_p)
22330 cvt = ufloat_optab;
22331 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22333 else
22335 cvt = sfloat_optab;
22336 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22338 break;
22340 default:
22341 gcc_unreachable ();
22345 /* Convert from IEEE 128-bit floating point. */
22346 else if (FLOAT128_IEEE_P (src_mode))
22348 if (src_mode == KFmode)
22349 kf_or_tf = 0;
22350 else if (src_mode == TFmode)
22351 kf_or_tf = 1;
22352 else
22353 gcc_unreachable ();
22355 switch (dest_mode)
22357 case DFmode:
22358 cvt = trunc_optab;
22359 hw_convert = hw_conversions[kf_or_tf].to_df;
22360 break;
22362 case SFmode:
22363 cvt = trunc_optab;
22364 hw_convert = hw_conversions[kf_or_tf].to_sf;
22365 break;
22367 case KFmode:
22368 case IFmode:
22369 case TFmode:
22370 if (FLOAT128_IBM_P (dest_mode))
22371 cvt = trunc_optab;
22372 else
22373 do_move = true;
22374 break;
22376 case SImode:
22377 if (unsigned_p)
22379 cvt = ufix_optab;
22380 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22382 else
22384 cvt = sfix_optab;
22385 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22387 break;
22389 case DImode:
22390 if (unsigned_p)
22392 cvt = ufix_optab;
22393 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22395 else
22397 cvt = sfix_optab;
22398 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22400 break;
22402 default:
22403 gcc_unreachable ();
22407 /* Both IBM format. */
22408 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22409 do_move = true;
22411 else
22412 gcc_unreachable ();
22414 /* Handle conversion between TFmode/KFmode. */
22415 if (do_move)
22416 emit_move_insn (dest, gen_lowpart (dest_mode, src));
22418 /* Handle conversion if we have hardware support. */
22419 else if (TARGET_FLOAT128_HW && hw_convert)
22420 emit_insn ((hw_convert) (dest, src));
22422 /* Call an external function to do the conversion. */
22423 else if (cvt != unknown_optab)
22425 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22426 gcc_assert (libfunc != NULL_RTX);
22428 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
22429 src_mode);
22431 gcc_assert (dest2 != NULL_RTX);
22432 if (!rtx_equal_p (dest, dest2))
22433 emit_move_insn (dest, dest2);
22436 else
22437 gcc_unreachable ();
22439 return;
22443 /* Emit the RTL for an sISEL pattern. */
22445 void
22446 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
22448 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
22451 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22452 can be used as that dest register. Return the dest register. */
22455 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22457 if (op2 == const0_rtx)
22458 return op1;
22460 if (GET_CODE (scratch) == SCRATCH)
22461 scratch = gen_reg_rtx (mode);
22463 if (logical_operand (op2, mode))
22464 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22465 else
22466 emit_insn (gen_rtx_SET (scratch,
22467 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22469 return scratch;
22472 void
22473 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22475 rtx condition_rtx;
22476 machine_mode op_mode;
22477 enum rtx_code cond_code;
22478 rtx result = operands[0];
22480 condition_rtx = rs6000_generate_compare (operands[1], mode);
22481 cond_code = GET_CODE (condition_rtx);
22483 if (cond_code == NE
22484 || cond_code == GE || cond_code == LE
22485 || cond_code == GEU || cond_code == LEU
22486 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22488 rtx not_result = gen_reg_rtx (CCEQmode);
22489 rtx not_op, rev_cond_rtx;
22490 machine_mode cc_mode;
22492 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22494 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22495 SImode, XEXP (condition_rtx, 0), const0_rtx);
22496 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22497 emit_insn (gen_rtx_SET (not_result, not_op));
22498 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22501 op_mode = GET_MODE (XEXP (operands[1], 0));
22502 if (op_mode == VOIDmode)
22503 op_mode = GET_MODE (XEXP (operands[1], 1));
22505 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22507 PUT_MODE (condition_rtx, DImode);
22508 convert_move (result, condition_rtx, 0);
22510 else
22512 PUT_MODE (condition_rtx, SImode);
22513 emit_insn (gen_rtx_SET (result, condition_rtx));
22517 /* Emit a branch of kind CODE to location LOC. */
22519 void
22520 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22522 rtx condition_rtx, loc_ref;
22524 condition_rtx = rs6000_generate_compare (operands[0], mode);
22525 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22526 emit_jump_insn (gen_rtx_SET (pc_rtx,
22527 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22528 loc_ref, pc_rtx)));
22531 /* Return the string to output a conditional branch to LABEL, which is
22532 the operand template of the label, or NULL if the branch is really a
22533 conditional return.
22535 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22536 condition code register and its mode specifies what kind of
22537 comparison we made.
22539 REVERSED is nonzero if we should reverse the sense of the comparison.
22541 INSN is the insn. */
22543 char *
22544 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22546 static char string[64];
22547 enum rtx_code code = GET_CODE (op);
22548 rtx cc_reg = XEXP (op, 0);
22549 machine_mode mode = GET_MODE (cc_reg);
22550 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22551 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22552 int really_reversed = reversed ^ need_longbranch;
22553 char *s = string;
22554 const char *ccode;
22555 const char *pred;
22556 rtx note;
22558 validate_condition_mode (code, mode);
22560 /* Work out which way this really branches. We could use
22561 reverse_condition_maybe_unordered here always but this
22562 makes the resulting assembler clearer. */
22563 if (really_reversed)
22565 /* Reversal of FP compares takes care -- an ordered compare
22566 becomes an unordered compare and vice versa. */
22567 if (mode == CCFPmode)
22568 code = reverse_condition_maybe_unordered (code);
22569 else
22570 code = reverse_condition (code);
22573 switch (code)
22575 /* Not all of these are actually distinct opcodes, but
22576 we distinguish them for clarity of the resulting assembler. */
22577 case NE: case LTGT:
22578 ccode = "ne"; break;
22579 case EQ: case UNEQ:
22580 ccode = "eq"; break;
22581 case GE: case GEU:
22582 ccode = "ge"; break;
22583 case GT: case GTU: case UNGT:
22584 ccode = "gt"; break;
22585 case LE: case LEU:
22586 ccode = "le"; break;
22587 case LT: case LTU: case UNLT:
22588 ccode = "lt"; break;
22589 case UNORDERED: ccode = "un"; break;
22590 case ORDERED: ccode = "nu"; break;
22591 case UNGE: ccode = "nl"; break;
22592 case UNLE: ccode = "ng"; break;
22593 default:
22594 gcc_unreachable ();
22597 /* Maybe we have a guess as to how likely the branch is. */
22598 pred = "";
22599 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22600 if (note != NULL_RTX)
22602 /* PROB is the difference from 50%. */
22603 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22604 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22606 /* Only hint for highly probable/improbable branches on newer cpus when
22607 we have real profile data, as static prediction overrides processor
22608 dynamic prediction. For older cpus we may as well always hint, but
22609 assume not taken for branches that are very close to 50% as a
22610 mispredicted taken branch is more expensive than a
22611 mispredicted not-taken branch. */
22612 if (rs6000_always_hint
22613 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22614 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22615 && br_prob_note_reliable_p (note)))
22617 if (abs (prob) > REG_BR_PROB_BASE / 20
22618 && ((prob > 0) ^ need_longbranch))
22619 pred = "+";
22620 else
22621 pred = "-";
22625 if (label == NULL)
22626 s += sprintf (s, "b%slr%s ", ccode, pred);
22627 else
22628 s += sprintf (s, "b%s%s ", ccode, pred);
22630 /* We need to escape any '%' characters in the reg_names string.
22631 Assume they'd only be the first character.... */
22632 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22633 *s++ = '%';
22634 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22636 if (label != NULL)
22638 /* If the branch distance was too far, we may have to use an
22639 unconditional branch to go the distance. */
22640 if (need_longbranch)
22641 s += sprintf (s, ",$+8\n\tb %s", label);
22642 else
22643 s += sprintf (s, ",%s", label);
22646 return string;
22649 /* Return insn for VSX or Altivec comparisons. */
22651 static rtx
22652 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22654 rtx mask;
22655 machine_mode mode = GET_MODE (op0);
22657 switch (code)
22659 default:
22660 break;
22662 case GE:
22663 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22664 return NULL_RTX;
22665 /* FALLTHRU */
22667 case EQ:
22668 case GT:
22669 case GTU:
22670 case ORDERED:
22671 case UNORDERED:
22672 case UNEQ:
22673 case LTGT:
22674 mask = gen_reg_rtx (mode);
22675 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22676 return mask;
22679 return NULL_RTX;
22682 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22683 DMODE is expected destination mode. This is a recursive function. */
22685 static rtx
22686 rs6000_emit_vector_compare (enum rtx_code rcode,
22687 rtx op0, rtx op1,
22688 machine_mode dmode)
22690 rtx mask;
22691 bool swap_operands = false;
22692 bool try_again = false;
22694 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22695 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22697 /* See if the comparison works as is. */
22698 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22699 if (mask)
22700 return mask;
22702 switch (rcode)
22704 case LT:
22705 rcode = GT;
22706 swap_operands = true;
22707 try_again = true;
22708 break;
22709 case LTU:
22710 rcode = GTU;
22711 swap_operands = true;
22712 try_again = true;
22713 break;
22714 case NE:
22715 case UNLE:
22716 case UNLT:
22717 case UNGE:
22718 case UNGT:
22719 /* Invert condition and try again.
22720 e.g., A != B becomes ~(A==B). */
22722 enum rtx_code rev_code;
22723 enum insn_code nor_code;
22724 rtx mask2;
22726 rev_code = reverse_condition_maybe_unordered (rcode);
22727 if (rev_code == UNKNOWN)
22728 return NULL_RTX;
22730 nor_code = optab_handler (one_cmpl_optab, dmode);
22731 if (nor_code == CODE_FOR_nothing)
22732 return NULL_RTX;
22734 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22735 if (!mask2)
22736 return NULL_RTX;
22738 mask = gen_reg_rtx (dmode);
22739 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22740 return mask;
22742 break;
22743 case GE:
22744 case GEU:
22745 case LE:
22746 case LEU:
22747 /* Try GT/GTU/LT/LTU OR EQ */
22749 rtx c_rtx, eq_rtx;
22750 enum insn_code ior_code;
22751 enum rtx_code new_code;
22753 switch (rcode)
22755 case GE:
22756 new_code = GT;
22757 break;
22759 case GEU:
22760 new_code = GTU;
22761 break;
22763 case LE:
22764 new_code = LT;
22765 break;
22767 case LEU:
22768 new_code = LTU;
22769 break;
22771 default:
22772 gcc_unreachable ();
22775 ior_code = optab_handler (ior_optab, dmode);
22776 if (ior_code == CODE_FOR_nothing)
22777 return NULL_RTX;
22779 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22780 if (!c_rtx)
22781 return NULL_RTX;
22783 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22784 if (!eq_rtx)
22785 return NULL_RTX;
22787 mask = gen_reg_rtx (dmode);
22788 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22789 return mask;
22791 break;
22792 default:
22793 return NULL_RTX;
22796 if (try_again)
22798 if (swap_operands)
22799 std::swap (op0, op1);
22801 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22802 if (mask)
22803 return mask;
22806 /* You only get two chances. */
22807 return NULL_RTX;
22810 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22811 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22812 operands for the relation operation COND. */
22815 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22816 rtx cond, rtx cc_op0, rtx cc_op1)
22818 machine_mode dest_mode = GET_MODE (dest);
22819 machine_mode mask_mode = GET_MODE (cc_op0);
22820 enum rtx_code rcode = GET_CODE (cond);
22821 machine_mode cc_mode = CCmode;
22822 rtx mask;
22823 rtx cond2;
22824 bool invert_move = false;
22826 if (VECTOR_UNIT_NONE_P (dest_mode))
22827 return 0;
22829 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22830 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22832 switch (rcode)
22834 /* Swap operands if we can, and fall back to doing the operation as
22835 specified, and doing a NOR to invert the test. */
22836 case NE:
22837 case UNLE:
22838 case UNLT:
22839 case UNGE:
22840 case UNGT:
22841 /* Invert condition and try again.
22842 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22843 invert_move = true;
22844 rcode = reverse_condition_maybe_unordered (rcode);
22845 if (rcode == UNKNOWN)
22846 return 0;
22847 break;
22849 case GE:
22850 case LE:
22851 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22853 /* Invert condition to avoid compound test. */
22854 invert_move = true;
22855 rcode = reverse_condition (rcode);
22857 break;
22859 case GTU:
22860 case GEU:
22861 case LTU:
22862 case LEU:
22863 /* Mark unsigned tests with CCUNSmode. */
22864 cc_mode = CCUNSmode;
22866 /* Invert condition to avoid compound test if necessary. */
22867 if (rcode == GEU || rcode == LEU)
22869 invert_move = true;
22870 rcode = reverse_condition (rcode);
22872 break;
22874 default:
22875 break;
22878 /* Get the vector mask for the given relational operations. */
22879 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22881 if (!mask)
22882 return 0;
22884 if (invert_move)
22885 std::swap (op_true, op_false);
22887 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22888 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22889 && (GET_CODE (op_true) == CONST_VECTOR
22890 || GET_CODE (op_false) == CONST_VECTOR))
22892 rtx constant_0 = CONST0_RTX (dest_mode);
22893 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22895 if (op_true == constant_m1 && op_false == constant_0)
22897 emit_move_insn (dest, mask);
22898 return 1;
22901 else if (op_true == constant_0 && op_false == constant_m1)
22903 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22904 return 1;
22907 /* If we can't use the vector comparison directly, perhaps we can use
22908 the mask for the true or false fields, instead of loading up a
22909 constant. */
22910 if (op_true == constant_m1)
22911 op_true = mask;
22913 if (op_false == constant_0)
22914 op_false = mask;
22917 if (!REG_P (op_true) && !SUBREG_P (op_true))
22918 op_true = force_reg (dest_mode, op_true);
22920 if (!REG_P (op_false) && !SUBREG_P (op_false))
22921 op_false = force_reg (dest_mode, op_false);
22923 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22924 CONST0_RTX (dest_mode));
22925 emit_insn (gen_rtx_SET (dest,
22926 gen_rtx_IF_THEN_ELSE (dest_mode,
22927 cond2,
22928 op_true,
22929 op_false)));
22930 return 1;
22933 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22934 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22935 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22936 hardware has no such operation. */
22938 static int
22939 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22941 enum rtx_code code = GET_CODE (op);
22942 rtx op0 = XEXP (op, 0);
22943 rtx op1 = XEXP (op, 1);
22944 machine_mode compare_mode = GET_MODE (op0);
22945 machine_mode result_mode = GET_MODE (dest);
22946 bool max_p = false;
22948 if (result_mode != compare_mode)
22949 return 0;
22951 if (code == GE || code == GT)
22952 max_p = true;
22953 else if (code == LE || code == LT)
22954 max_p = false;
22955 else
22956 return 0;
22958 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22961 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22962 max_p = !max_p;
22964 else
22965 return 0;
22967 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22968 return 1;
22971 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22972 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22973 operands of the last comparison is nonzero/true, FALSE_COND if it is
22974 zero/false. Return 0 if the hardware has no such operation. */
22976 static int
22977 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22979 enum rtx_code code = GET_CODE (op);
22980 rtx op0 = XEXP (op, 0);
22981 rtx op1 = XEXP (op, 1);
22982 machine_mode result_mode = GET_MODE (dest);
22983 rtx compare_rtx;
22984 rtx cmove_rtx;
22985 rtx clobber_rtx;
22987 if (!can_create_pseudo_p ())
22988 return 0;
22990 switch (code)
22992 case EQ:
22993 case GE:
22994 case GT:
22995 break;
22997 case NE:
22998 case LT:
22999 case LE:
23000 code = swap_condition (code);
23001 std::swap (op0, op1);
23002 break;
23004 default:
23005 return 0;
23008 /* Generate: [(parallel [(set (dest)
23009 (if_then_else (op (cmp1) (cmp2))
23010 (true)
23011 (false)))
23012 (clobber (scratch))])]. */
23014 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
23015 cmove_rtx = gen_rtx_SET (dest,
23016 gen_rtx_IF_THEN_ELSE (result_mode,
23017 compare_rtx,
23018 true_cond,
23019 false_cond));
23021 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
23022 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23023 gen_rtvec (2, cmove_rtx, clobber_rtx)));
23025 return 1;
23028 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
23029 operands of the last comparison is nonzero/true, FALSE_COND if it
23030 is zero/false. Return 0 if the hardware has no such operation. */
23033 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23035 enum rtx_code code = GET_CODE (op);
23036 rtx op0 = XEXP (op, 0);
23037 rtx op1 = XEXP (op, 1);
23038 machine_mode compare_mode = GET_MODE (op0);
23039 machine_mode result_mode = GET_MODE (dest);
23040 rtx temp;
23041 bool is_against_zero;
23043 /* These modes should always match. */
23044 if (GET_MODE (op1) != compare_mode
23045 /* In the isel case however, we can use a compare immediate, so
23046 op1 may be a small constant. */
23047 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
23048 return 0;
23049 if (GET_MODE (true_cond) != result_mode)
23050 return 0;
23051 if (GET_MODE (false_cond) != result_mode)
23052 return 0;
23054 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
23055 if (TARGET_P9_MINMAX
23056 && (compare_mode == SFmode || compare_mode == DFmode)
23057 && (result_mode == SFmode || result_mode == DFmode))
23059 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
23060 return 1;
23062 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
23063 return 1;
23066 /* Don't allow using floating point comparisons for integer results for
23067 now. */
23068 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
23069 return 0;
23071 /* First, work out if the hardware can do this at all, or
23072 if it's too slow.... */
23073 if (!FLOAT_MODE_P (compare_mode))
23075 if (TARGET_ISEL)
23076 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
23077 return 0;
23080 is_against_zero = op1 == CONST0_RTX (compare_mode);
23082 /* A floating-point subtract might overflow, underflow, or produce
23083 an inexact result, thus changing the floating-point flags, so it
23084 can't be generated if we care about that. It's safe if one side
23085 of the construct is zero, since then no subtract will be
23086 generated. */
23087 if (SCALAR_FLOAT_MODE_P (compare_mode)
23088 && flag_trapping_math && ! is_against_zero)
23089 return 0;
23091 /* Eliminate half of the comparisons by switching operands, this
23092 makes the remaining code simpler. */
23093 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
23094 || code == LTGT || code == LT || code == UNLE)
23096 code = reverse_condition_maybe_unordered (code);
23097 temp = true_cond;
23098 true_cond = false_cond;
23099 false_cond = temp;
23102 /* UNEQ and LTGT take four instructions for a comparison with zero,
23103 it'll probably be faster to use a branch here too. */
23104 if (code == UNEQ && HONOR_NANS (compare_mode))
23105 return 0;
23107 /* We're going to try to implement comparisons by performing
23108 a subtract, then comparing against zero. Unfortunately,
23109 Inf - Inf is NaN which is not zero, and so if we don't
23110 know that the operand is finite and the comparison
23111 would treat EQ different to UNORDERED, we can't do it. */
23112 if (HONOR_INFINITIES (compare_mode)
23113 && code != GT && code != UNGE
23114 && (GET_CODE (op1) != CONST_DOUBLE
23115 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
23116 /* Constructs of the form (a OP b ? a : b) are safe. */
23117 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
23118 || (! rtx_equal_p (op0, true_cond)
23119 && ! rtx_equal_p (op1, true_cond))))
23120 return 0;
23122 /* At this point we know we can use fsel. */
23124 /* Reduce the comparison to a comparison against zero. */
23125 if (! is_against_zero)
23127 temp = gen_reg_rtx (compare_mode);
23128 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
23129 op0 = temp;
23130 op1 = CONST0_RTX (compare_mode);
23133 /* If we don't care about NaNs we can reduce some of the comparisons
23134 down to faster ones. */
23135 if (! HONOR_NANS (compare_mode))
23136 switch (code)
23138 case GT:
23139 code = LE;
23140 temp = true_cond;
23141 true_cond = false_cond;
23142 false_cond = temp;
23143 break;
23144 case UNGE:
23145 code = GE;
23146 break;
23147 case UNEQ:
23148 code = EQ;
23149 break;
23150 default:
23151 break;
23154 /* Now, reduce everything down to a GE. */
23155 switch (code)
23157 case GE:
23158 break;
23160 case LE:
23161 temp = gen_reg_rtx (compare_mode);
23162 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23163 op0 = temp;
23164 break;
23166 case ORDERED:
23167 temp = gen_reg_rtx (compare_mode);
23168 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
23169 op0 = temp;
23170 break;
23172 case EQ:
23173 temp = gen_reg_rtx (compare_mode);
23174 emit_insn (gen_rtx_SET (temp,
23175 gen_rtx_NEG (compare_mode,
23176 gen_rtx_ABS (compare_mode, op0))));
23177 op0 = temp;
23178 break;
23180 case UNGE:
23181 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23182 temp = gen_reg_rtx (result_mode);
23183 emit_insn (gen_rtx_SET (temp,
23184 gen_rtx_IF_THEN_ELSE (result_mode,
23185 gen_rtx_GE (VOIDmode,
23186 op0, op1),
23187 true_cond, false_cond)));
23188 false_cond = true_cond;
23189 true_cond = temp;
23191 temp = gen_reg_rtx (compare_mode);
23192 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23193 op0 = temp;
23194 break;
23196 case GT:
23197 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23198 temp = gen_reg_rtx (result_mode);
23199 emit_insn (gen_rtx_SET (temp,
23200 gen_rtx_IF_THEN_ELSE (result_mode,
23201 gen_rtx_GE (VOIDmode,
23202 op0, op1),
23203 true_cond, false_cond)));
23204 true_cond = false_cond;
23205 false_cond = temp;
23207 temp = gen_reg_rtx (compare_mode);
23208 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23209 op0 = temp;
23210 break;
23212 default:
23213 gcc_unreachable ();
23216 emit_insn (gen_rtx_SET (dest,
23217 gen_rtx_IF_THEN_ELSE (result_mode,
23218 gen_rtx_GE (VOIDmode,
23219 op0, op1),
23220 true_cond, false_cond)));
23221 return 1;
23224 /* Same as above, but for ints (isel). */
23226 static int
23227 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23229 rtx condition_rtx, cr;
23230 machine_mode mode = GET_MODE (dest);
23231 enum rtx_code cond_code;
23232 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
23233 bool signedp;
23235 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
23236 return 0;
23238 /* We still have to do the compare, because isel doesn't do a
23239 compare, it just looks at the CRx bits set by a previous compare
23240 instruction. */
23241 condition_rtx = rs6000_generate_compare (op, mode);
23242 cond_code = GET_CODE (condition_rtx);
23243 cr = XEXP (condition_rtx, 0);
23244 signedp = GET_MODE (cr) == CCmode;
23246 isel_func = (mode == SImode
23247 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
23248 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
23250 switch (cond_code)
23252 case LT: case GT: case LTU: case GTU: case EQ:
23253 /* isel handles these directly. */
23254 break;
23256 default:
23257 /* We need to swap the sense of the comparison. */
23259 std::swap (false_cond, true_cond);
23260 PUT_CODE (condition_rtx, reverse_condition (cond_code));
23262 break;
23265 false_cond = force_reg (mode, false_cond);
23266 if (true_cond != const0_rtx)
23267 true_cond = force_reg (mode, true_cond);
23269 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
23271 return 1;
23274 const char *
23275 output_isel (rtx *operands)
23277 enum rtx_code code;
23279 code = GET_CODE (operands[1]);
23281 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
23283 gcc_assert (GET_CODE (operands[2]) == REG
23284 && GET_CODE (operands[3]) == REG);
23285 PUT_CODE (operands[1], reverse_condition (code));
23286 return "isel %0,%3,%2,%j1";
23289 return "isel %0,%2,%3,%j1";
23292 void
23293 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23295 machine_mode mode = GET_MODE (op0);
23296 enum rtx_code c;
23297 rtx target;
23299 /* VSX/altivec have direct min/max insns. */
23300 if ((code == SMAX || code == SMIN)
23301 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
23302 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
23304 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
23305 return;
23308 if (code == SMAX || code == SMIN)
23309 c = GE;
23310 else
23311 c = GEU;
23313 if (code == SMAX || code == UMAX)
23314 target = emit_conditional_move (dest, c, op0, op1, mode,
23315 op0, op1, mode, 0);
23316 else
23317 target = emit_conditional_move (dest, c, op0, op1, mode,
23318 op1, op0, mode, 0);
23319 gcc_assert (target);
23320 if (target != dest)
23321 emit_move_insn (dest, target);
23324 /* Split a signbit operation on 64-bit machines with direct move. Also allow
23325 for the value to come from memory or if it is already loaded into a GPR. */
23327 void
23328 rs6000_split_signbit (rtx dest, rtx src)
23330 machine_mode d_mode = GET_MODE (dest);
23331 machine_mode s_mode = GET_MODE (src);
23332 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
23333 rtx shift_reg = dest_di;
23335 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
23337 if (MEM_P (src))
23339 rtx mem = (WORDS_BIG_ENDIAN
23340 ? adjust_address (src, DImode, 0)
23341 : adjust_address (src, DImode, 8));
23342 emit_insn (gen_rtx_SET (dest_di, mem));
23345 else
23347 unsigned int r = reg_or_subregno (src);
23349 if (INT_REGNO_P (r))
23350 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
23352 else
23354 /* Generate the special mfvsrd instruction to get it in a GPR. */
23355 gcc_assert (VSX_REGNO_P (r));
23356 if (s_mode == KFmode)
23357 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
23358 else
23359 emit_insn (gen_signbittf2_dm2 (dest_di, src));
23363 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
23364 return;
23367 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23368 COND is true. Mark the jump as unlikely to be taken. */
23370 static void
23371 emit_unlikely_jump (rtx cond, rtx label)
23373 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23374 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23375 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
23378 /* A subroutine of the atomic operation splitters. Emit a load-locked
23379 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23380 the zero_extend operation. */
23382 static void
23383 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23385 rtx (*fn) (rtx, rtx) = NULL;
23387 switch (mode)
23389 case QImode:
23390 fn = gen_load_lockedqi;
23391 break;
23392 case HImode:
23393 fn = gen_load_lockedhi;
23394 break;
23395 case SImode:
23396 if (GET_MODE (mem) == QImode)
23397 fn = gen_load_lockedqi_si;
23398 else if (GET_MODE (mem) == HImode)
23399 fn = gen_load_lockedhi_si;
23400 else
23401 fn = gen_load_lockedsi;
23402 break;
23403 case DImode:
23404 fn = gen_load_lockeddi;
23405 break;
23406 case TImode:
23407 fn = gen_load_lockedti;
23408 break;
23409 default:
23410 gcc_unreachable ();
23412 emit_insn (fn (reg, mem));
23415 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23416 instruction in MODE. */
23418 static void
23419 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23421 rtx (*fn) (rtx, rtx, rtx) = NULL;
23423 switch (mode)
23425 case QImode:
23426 fn = gen_store_conditionalqi;
23427 break;
23428 case HImode:
23429 fn = gen_store_conditionalhi;
23430 break;
23431 case SImode:
23432 fn = gen_store_conditionalsi;
23433 break;
23434 case DImode:
23435 fn = gen_store_conditionaldi;
23436 break;
23437 case TImode:
23438 fn = gen_store_conditionalti;
23439 break;
23440 default:
23441 gcc_unreachable ();
23444 /* Emit sync before stwcx. to address PPC405 Erratum. */
23445 if (PPC405_ERRATUM77)
23446 emit_insn (gen_hwsync ());
23448 emit_insn (fn (res, mem, val));
23451 /* Expand barriers before and after a load_locked/store_cond sequence. */
23453 static rtx
23454 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23456 rtx addr = XEXP (mem, 0);
23457 int strict_p = (reload_in_progress || reload_completed);
23459 if (!legitimate_indirect_address_p (addr, strict_p)
23460 && !legitimate_indexed_address_p (addr, strict_p))
23462 addr = force_reg (Pmode, addr);
23463 mem = replace_equiv_address_nv (mem, addr);
23466 switch (model)
23468 case MEMMODEL_RELAXED:
23469 case MEMMODEL_CONSUME:
23470 case MEMMODEL_ACQUIRE:
23471 break;
23472 case MEMMODEL_RELEASE:
23473 case MEMMODEL_ACQ_REL:
23474 emit_insn (gen_lwsync ());
23475 break;
23476 case MEMMODEL_SEQ_CST:
23477 emit_insn (gen_hwsync ());
23478 break;
23479 default:
23480 gcc_unreachable ();
23482 return mem;
23485 static void
23486 rs6000_post_atomic_barrier (enum memmodel model)
23488 switch (model)
23490 case MEMMODEL_RELAXED:
23491 case MEMMODEL_CONSUME:
23492 case MEMMODEL_RELEASE:
23493 break;
23494 case MEMMODEL_ACQUIRE:
23495 case MEMMODEL_ACQ_REL:
23496 case MEMMODEL_SEQ_CST:
23497 emit_insn (gen_isync ());
23498 break;
23499 default:
23500 gcc_unreachable ();
23504 /* A subroutine of the various atomic expanders. For sub-word operations,
23505 we must adjust things to operate on SImode. Given the original MEM,
23506 return a new aligned memory. Also build and return the quantities by
23507 which to shift and mask. */
23509 static rtx
23510 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23512 rtx addr, align, shift, mask, mem;
23513 HOST_WIDE_INT shift_mask;
23514 machine_mode mode = GET_MODE (orig_mem);
23516 /* For smaller modes, we have to implement this via SImode. */
23517 shift_mask = (mode == QImode ? 0x18 : 0x10);
23519 addr = XEXP (orig_mem, 0);
23520 addr = force_reg (GET_MODE (addr), addr);
23522 /* Aligned memory containing subword. Generate a new memory. We
23523 do not want any of the existing MEM_ATTR data, as we're now
23524 accessing memory outside the original object. */
23525 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23526 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23527 mem = gen_rtx_MEM (SImode, align);
23528 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23529 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23530 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23532 /* Shift amount for subword relative to aligned word. */
23533 shift = gen_reg_rtx (SImode);
23534 addr = gen_lowpart (SImode, addr);
23535 rtx tmp = gen_reg_rtx (SImode);
23536 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23537 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23538 if (BYTES_BIG_ENDIAN)
23539 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23540 shift, 1, OPTAB_LIB_WIDEN);
23541 *pshift = shift;
23543 /* Mask for insertion. */
23544 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23545 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23546 *pmask = mask;
23548 return mem;
23551 /* A subroutine of the various atomic expanders. For sub-word operands,
23552 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23554 static rtx
23555 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23557 rtx x;
23559 x = gen_reg_rtx (SImode);
23560 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23561 gen_rtx_NOT (SImode, mask),
23562 oldval)));
23564 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23566 return x;
23569 /* A subroutine of the various atomic expanders. For sub-word operands,
23570 extract WIDE to NARROW via SHIFT. */
23572 static void
23573 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23575 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23576 wide, 1, OPTAB_LIB_WIDEN);
23577 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23580 /* Expand an atomic compare and swap operation. */
23582 void
23583 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23585 rtx boolval, retval, mem, oldval, newval, cond;
23586 rtx label1, label2, x, mask, shift;
23587 machine_mode mode, orig_mode;
23588 enum memmodel mod_s, mod_f;
23589 bool is_weak;
23591 boolval = operands[0];
23592 retval = operands[1];
23593 mem = operands[2];
23594 oldval = operands[3];
23595 newval = operands[4];
23596 is_weak = (INTVAL (operands[5]) != 0);
23597 mod_s = memmodel_base (INTVAL (operands[6]));
23598 mod_f = memmodel_base (INTVAL (operands[7]));
23599 orig_mode = mode = GET_MODE (mem);
23601 mask = shift = NULL_RTX;
23602 if (mode == QImode || mode == HImode)
23604 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23605 lwarx and shift/mask operations. With power8, we need to do the
23606 comparison in SImode, but the store is still done in QI/HImode. */
23607 oldval = convert_modes (SImode, mode, oldval, 1);
23609 if (!TARGET_SYNC_HI_QI)
23611 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23613 /* Shift and mask OLDVAL into position with the word. */
23614 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23615 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23617 /* Shift and mask NEWVAL into position within the word. */
23618 newval = convert_modes (SImode, mode, newval, 1);
23619 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23620 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23623 /* Prepare to adjust the return value. */
23624 retval = gen_reg_rtx (SImode);
23625 mode = SImode;
23627 else if (reg_overlap_mentioned_p (retval, oldval))
23628 oldval = copy_to_reg (oldval);
23630 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23631 oldval = copy_to_mode_reg (mode, oldval);
23633 if (reg_overlap_mentioned_p (retval, newval))
23634 newval = copy_to_reg (newval);
23636 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23638 label1 = NULL_RTX;
23639 if (!is_weak)
23641 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23642 emit_label (XEXP (label1, 0));
23644 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23646 emit_load_locked (mode, retval, mem);
23648 x = retval;
23649 if (mask)
23650 x = expand_simple_binop (SImode, AND, retval, mask,
23651 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23653 cond = gen_reg_rtx (CCmode);
23654 /* If we have TImode, synthesize a comparison. */
23655 if (mode != TImode)
23656 x = gen_rtx_COMPARE (CCmode, x, oldval);
23657 else
23659 rtx xor1_result = gen_reg_rtx (DImode);
23660 rtx xor2_result = gen_reg_rtx (DImode);
23661 rtx or_result = gen_reg_rtx (DImode);
23662 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23663 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23664 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23665 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23667 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23668 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23669 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23670 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23673 emit_insn (gen_rtx_SET (cond, x));
23675 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23676 emit_unlikely_jump (x, label2);
23678 x = newval;
23679 if (mask)
23680 x = rs6000_mask_atomic_subword (retval, newval, mask);
23682 emit_store_conditional (orig_mode, cond, mem, x);
23684 if (!is_weak)
23686 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23687 emit_unlikely_jump (x, label1);
23690 if (!is_mm_relaxed (mod_f))
23691 emit_label (XEXP (label2, 0));
23693 rs6000_post_atomic_barrier (mod_s);
23695 if (is_mm_relaxed (mod_f))
23696 emit_label (XEXP (label2, 0));
23698 if (shift)
23699 rs6000_finish_atomic_subword (operands[1], retval, shift);
23700 else if (mode != GET_MODE (operands[1]))
23701 convert_move (operands[1], retval, 1);
23703 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23704 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23705 emit_insn (gen_rtx_SET (boolval, x));
23708 /* Expand an atomic exchange operation. */
23710 void
23711 rs6000_expand_atomic_exchange (rtx operands[])
23713 rtx retval, mem, val, cond;
23714 machine_mode mode;
23715 enum memmodel model;
23716 rtx label, x, mask, shift;
23718 retval = operands[0];
23719 mem = operands[1];
23720 val = operands[2];
23721 model = memmodel_base (INTVAL (operands[3]));
23722 mode = GET_MODE (mem);
23724 mask = shift = NULL_RTX;
23725 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23727 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23729 /* Shift and mask VAL into position with the word. */
23730 val = convert_modes (SImode, mode, val, 1);
23731 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23732 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23734 /* Prepare to adjust the return value. */
23735 retval = gen_reg_rtx (SImode);
23736 mode = SImode;
23739 mem = rs6000_pre_atomic_barrier (mem, model);
23741 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23742 emit_label (XEXP (label, 0));
23744 emit_load_locked (mode, retval, mem);
23746 x = val;
23747 if (mask)
23748 x = rs6000_mask_atomic_subword (retval, val, mask);
23750 cond = gen_reg_rtx (CCmode);
23751 emit_store_conditional (mode, cond, mem, x);
23753 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23754 emit_unlikely_jump (x, label);
23756 rs6000_post_atomic_barrier (model);
23758 if (shift)
23759 rs6000_finish_atomic_subword (operands[0], retval, shift);
23762 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23763 to perform. MEM is the memory on which to operate. VAL is the second
23764 operand of the binary operator. BEFORE and AFTER are optional locations to
23765 return the value of MEM either before of after the operation. MODEL_RTX
23766 is a CONST_INT containing the memory model to use. */
23768 void
23769 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23770 rtx orig_before, rtx orig_after, rtx model_rtx)
23772 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23773 machine_mode mode = GET_MODE (mem);
23774 machine_mode store_mode = mode;
23775 rtx label, x, cond, mask, shift;
23776 rtx before = orig_before, after = orig_after;
23778 mask = shift = NULL_RTX;
23779 /* On power8, we want to use SImode for the operation. On previous systems,
23780 use the operation in a subword and shift/mask to get the proper byte or
23781 halfword. */
23782 if (mode == QImode || mode == HImode)
23784 if (TARGET_SYNC_HI_QI)
23786 val = convert_modes (SImode, mode, val, 1);
23788 /* Prepare to adjust the return value. */
23789 before = gen_reg_rtx (SImode);
23790 if (after)
23791 after = gen_reg_rtx (SImode);
23792 mode = SImode;
23794 else
23796 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23798 /* Shift and mask VAL into position with the word. */
23799 val = convert_modes (SImode, mode, val, 1);
23800 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23801 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23803 switch (code)
23805 case IOR:
23806 case XOR:
23807 /* We've already zero-extended VAL. That is sufficient to
23808 make certain that it does not affect other bits. */
23809 mask = NULL;
23810 break;
23812 case AND:
23813 /* If we make certain that all of the other bits in VAL are
23814 set, that will be sufficient to not affect other bits. */
23815 x = gen_rtx_NOT (SImode, mask);
23816 x = gen_rtx_IOR (SImode, x, val);
23817 emit_insn (gen_rtx_SET (val, x));
23818 mask = NULL;
23819 break;
23821 case NOT:
23822 case PLUS:
23823 case MINUS:
23824 /* These will all affect bits outside the field and need
23825 adjustment via MASK within the loop. */
23826 break;
23828 default:
23829 gcc_unreachable ();
23832 /* Prepare to adjust the return value. */
23833 before = gen_reg_rtx (SImode);
23834 if (after)
23835 after = gen_reg_rtx (SImode);
23836 store_mode = mode = SImode;
23840 mem = rs6000_pre_atomic_barrier (mem, model);
23842 label = gen_label_rtx ();
23843 emit_label (label);
23844 label = gen_rtx_LABEL_REF (VOIDmode, label);
23846 if (before == NULL_RTX)
23847 before = gen_reg_rtx (mode);
23849 emit_load_locked (mode, before, mem);
23851 if (code == NOT)
23853 x = expand_simple_binop (mode, AND, before, val,
23854 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23855 after = expand_simple_unop (mode, NOT, x, after, 1);
23857 else
23859 after = expand_simple_binop (mode, code, before, val,
23860 after, 1, OPTAB_LIB_WIDEN);
23863 x = after;
23864 if (mask)
23866 x = expand_simple_binop (SImode, AND, after, mask,
23867 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23868 x = rs6000_mask_atomic_subword (before, x, mask);
23870 else if (store_mode != mode)
23871 x = convert_modes (store_mode, mode, x, 1);
23873 cond = gen_reg_rtx (CCmode);
23874 emit_store_conditional (store_mode, cond, mem, x);
23876 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23877 emit_unlikely_jump (x, label);
23879 rs6000_post_atomic_barrier (model);
23881 if (shift)
23883 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23884 then do the calcuations in a SImode register. */
23885 if (orig_before)
23886 rs6000_finish_atomic_subword (orig_before, before, shift);
23887 if (orig_after)
23888 rs6000_finish_atomic_subword (orig_after, after, shift);
23890 else if (store_mode != mode)
23892 /* QImode/HImode on machines with lbarx/lharx where we do the native
23893 operation and then do the calcuations in a SImode register. */
23894 if (orig_before)
23895 convert_move (orig_before, before, 1);
23896 if (orig_after)
23897 convert_move (orig_after, after, 1);
23899 else if (orig_after && after != orig_after)
23900 emit_move_insn (orig_after, after);
23903 /* Emit instructions to move SRC to DST. Called by splitters for
23904 multi-register moves. It will emit at most one instruction for
23905 each register that is accessed; that is, it won't emit li/lis pairs
23906 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23907 register. */
23909 void
23910 rs6000_split_multireg_move (rtx dst, rtx src)
23912 /* The register number of the first register being moved. */
23913 int reg;
23914 /* The mode that is to be moved. */
23915 machine_mode mode;
23916 /* The mode that the move is being done in, and its size. */
23917 machine_mode reg_mode;
23918 int reg_mode_size;
23919 /* The number of registers that will be moved. */
23920 int nregs;
23922 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23923 mode = GET_MODE (dst);
23924 nregs = hard_regno_nregs[reg][mode];
23925 if (FP_REGNO_P (reg))
23926 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23927 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
23928 else if (ALTIVEC_REGNO_P (reg))
23929 reg_mode = V16QImode;
23930 else
23931 reg_mode = word_mode;
23932 reg_mode_size = GET_MODE_SIZE (reg_mode);
23934 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23936 /* TDmode residing in FP registers is special, since the ISA requires that
23937 the lower-numbered word of a register pair is always the most significant
23938 word, even in little-endian mode. This does not match the usual subreg
23939 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23940 the appropriate constituent registers "by hand" in little-endian mode.
23942 Note we do not need to check for destructive overlap here since TDmode
23943 can only reside in even/odd register pairs. */
23944 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23946 rtx p_src, p_dst;
23947 int i;
23949 for (i = 0; i < nregs; i++)
23951 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23952 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23953 else
23954 p_src = simplify_gen_subreg (reg_mode, src, mode,
23955 i * reg_mode_size);
23957 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23958 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23959 else
23960 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23961 i * reg_mode_size);
23963 emit_insn (gen_rtx_SET (p_dst, p_src));
23966 return;
23969 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23971 /* Move register range backwards, if we might have destructive
23972 overlap. */
23973 int i;
23974 for (i = nregs - 1; i >= 0; i--)
23975 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23976 i * reg_mode_size),
23977 simplify_gen_subreg (reg_mode, src, mode,
23978 i * reg_mode_size)));
23980 else
23982 int i;
23983 int j = -1;
23984 bool used_update = false;
23985 rtx restore_basereg = NULL_RTX;
23987 if (MEM_P (src) && INT_REGNO_P (reg))
23989 rtx breg;
23991 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23992 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23994 rtx delta_rtx;
23995 breg = XEXP (XEXP (src, 0), 0);
23996 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23997 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23998 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23999 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24000 src = replace_equiv_address (src, breg);
24002 else if (! rs6000_offsettable_memref_p (src, reg_mode))
24004 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
24006 rtx basereg = XEXP (XEXP (src, 0), 0);
24007 if (TARGET_UPDATE)
24009 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
24010 emit_insn (gen_rtx_SET (ndst,
24011 gen_rtx_MEM (reg_mode,
24012 XEXP (src, 0))));
24013 used_update = true;
24015 else
24016 emit_insn (gen_rtx_SET (basereg,
24017 XEXP (XEXP (src, 0), 1)));
24018 src = replace_equiv_address (src, basereg);
24020 else
24022 rtx basereg = gen_rtx_REG (Pmode, reg);
24023 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
24024 src = replace_equiv_address (src, basereg);
24028 breg = XEXP (src, 0);
24029 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
24030 breg = XEXP (breg, 0);
24032 /* If the base register we are using to address memory is
24033 also a destination reg, then change that register last. */
24034 if (REG_P (breg)
24035 && REGNO (breg) >= REGNO (dst)
24036 && REGNO (breg) < REGNO (dst) + nregs)
24037 j = REGNO (breg) - REGNO (dst);
24039 else if (MEM_P (dst) && INT_REGNO_P (reg))
24041 rtx breg;
24043 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
24044 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
24046 rtx delta_rtx;
24047 breg = XEXP (XEXP (dst, 0), 0);
24048 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
24049 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
24050 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
24052 /* We have to update the breg before doing the store.
24053 Use store with update, if available. */
24055 if (TARGET_UPDATE)
24057 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24058 emit_insn (TARGET_32BIT
24059 ? (TARGET_POWERPC64
24060 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
24061 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
24062 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
24063 used_update = true;
24065 else
24066 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24067 dst = replace_equiv_address (dst, breg);
24069 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
24070 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
24072 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
24074 rtx basereg = XEXP (XEXP (dst, 0), 0);
24075 if (TARGET_UPDATE)
24077 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24078 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
24079 XEXP (dst, 0)),
24080 nsrc));
24081 used_update = true;
24083 else
24084 emit_insn (gen_rtx_SET (basereg,
24085 XEXP (XEXP (dst, 0), 1)));
24086 dst = replace_equiv_address (dst, basereg);
24088 else
24090 rtx basereg = XEXP (XEXP (dst, 0), 0);
24091 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
24092 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
24093 && REG_P (basereg)
24094 && REG_P (offsetreg)
24095 && REGNO (basereg) != REGNO (offsetreg));
24096 if (REGNO (basereg) == 0)
24098 rtx tmp = offsetreg;
24099 offsetreg = basereg;
24100 basereg = tmp;
24102 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
24103 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
24104 dst = replace_equiv_address (dst, basereg);
24107 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
24108 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
24111 for (i = 0; i < nregs; i++)
24113 /* Calculate index to next subword. */
24114 ++j;
24115 if (j == nregs)
24116 j = 0;
24118 /* If compiler already emitted move of first word by
24119 store with update, no need to do anything. */
24120 if (j == 0 && used_update)
24121 continue;
24123 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24124 j * reg_mode_size),
24125 simplify_gen_subreg (reg_mode, src, mode,
24126 j * reg_mode_size)));
24128 if (restore_basereg != NULL_RTX)
24129 emit_insn (restore_basereg);
24134 /* This page contains routines that are used to determine what the
24135 function prologue and epilogue code will do and write them out. */
24137 static inline bool
24138 save_reg_p (int r)
24140 return !call_used_regs[r] && df_regs_ever_live_p (r);
24143 /* Determine whether the gp REG is really used. */
24145 static bool
24146 rs6000_reg_live_or_pic_offset_p (int reg)
24148 /* We need to mark the PIC offset register live for the same conditions
24149 as it is set up, or otherwise it won't be saved before we clobber it. */
24151 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
24153 if (TARGET_TOC && TARGET_MINIMAL_TOC
24154 && (crtl->calls_eh_return
24155 || df_regs_ever_live_p (reg)
24156 || !constant_pool_empty_p ()))
24157 return true;
24159 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
24160 && flag_pic)
24161 return true;
24164 /* If the function calls eh_return, claim used all the registers that would
24165 be checked for liveness otherwise. */
24167 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
24168 && !call_used_regs[reg]);
24171 /* Return the first fixed-point register that is required to be
24172 saved. 32 if none. */
24175 first_reg_to_save (void)
24177 int first_reg;
24179 /* Find lowest numbered live register. */
24180 for (first_reg = 13; first_reg <= 31; first_reg++)
24181 if (save_reg_p (first_reg))
24182 break;
24184 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
24185 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
24186 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24187 || (TARGET_TOC && TARGET_MINIMAL_TOC))
24188 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
24189 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
24191 #if TARGET_MACHO
24192 if (flag_pic
24193 && crtl->uses_pic_offset_table
24194 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
24195 return RS6000_PIC_OFFSET_TABLE_REGNUM;
24196 #endif
24198 return first_reg;
24201 /* Similar, for FP regs. */
24204 first_fp_reg_to_save (void)
24206 int first_reg;
24208 /* Find lowest numbered live register. */
24209 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
24210 if (save_reg_p (first_reg))
24211 break;
24213 return first_reg;
24216 /* Similar, for AltiVec regs. */
24218 static int
24219 first_altivec_reg_to_save (void)
24221 int i;
24223 /* Stack frame remains as is unless we are in AltiVec ABI. */
24224 if (! TARGET_ALTIVEC_ABI)
24225 return LAST_ALTIVEC_REGNO + 1;
24227 /* On Darwin, the unwind routines are compiled without
24228 TARGET_ALTIVEC, and use save_world to save/restore the
24229 altivec registers when necessary. */
24230 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24231 && ! TARGET_ALTIVEC)
24232 return FIRST_ALTIVEC_REGNO + 20;
24234 /* Find lowest numbered live register. */
24235 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
24236 if (save_reg_p (i))
24237 break;
24239 return i;
24242 /* Return a 32-bit mask of the AltiVec registers we need to set in
24243 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24244 the 32-bit word is 0. */
24246 static unsigned int
24247 compute_vrsave_mask (void)
24249 unsigned int i, mask = 0;
24251 /* On Darwin, the unwind routines are compiled without
24252 TARGET_ALTIVEC, and use save_world to save/restore the
24253 call-saved altivec registers when necessary. */
24254 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24255 && ! TARGET_ALTIVEC)
24256 mask |= 0xFFF;
24258 /* First, find out if we use _any_ altivec registers. */
24259 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24260 if (df_regs_ever_live_p (i))
24261 mask |= ALTIVEC_REG_BIT (i);
24263 if (mask == 0)
24264 return mask;
24266 /* Next, remove the argument registers from the set. These must
24267 be in the VRSAVE mask set by the caller, so we don't need to add
24268 them in again. More importantly, the mask we compute here is
24269 used to generate CLOBBERs in the set_vrsave insn, and we do not
24270 wish the argument registers to die. */
24271 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
24272 mask &= ~ALTIVEC_REG_BIT (i);
24274 /* Similarly, remove the return value from the set. */
24276 bool yes = false;
24277 diddle_return_value (is_altivec_return_reg, &yes);
24278 if (yes)
24279 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
24282 return mask;
24285 /* For a very restricted set of circumstances, we can cut down the
24286 size of prologues/epilogues by calling our own save/restore-the-world
24287 routines. */
24289 static void
24290 compute_save_world_info (rs6000_stack_t *info)
24292 info->world_save_p = 1;
24293 info->world_save_p
24294 = (WORLD_SAVE_P (info)
24295 && DEFAULT_ABI == ABI_DARWIN
24296 && !cfun->has_nonlocal_label
24297 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
24298 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
24299 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
24300 && info->cr_save_p);
24302 /* This will not work in conjunction with sibcalls. Make sure there
24303 are none. (This check is expensive, but seldom executed.) */
24304 if (WORLD_SAVE_P (info))
24306 rtx_insn *insn;
24307 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
24308 if (CALL_P (insn) && SIBLING_CALL_P (insn))
24310 info->world_save_p = 0;
24311 break;
24315 if (WORLD_SAVE_P (info))
24317 /* Even if we're not touching VRsave, make sure there's room on the
24318 stack for it, if it looks like we're calling SAVE_WORLD, which
24319 will attempt to save it. */
24320 info->vrsave_size = 4;
24322 /* If we are going to save the world, we need to save the link register too. */
24323 info->lr_save_p = 1;
24325 /* "Save" the VRsave register too if we're saving the world. */
24326 if (info->vrsave_mask == 0)
24327 info->vrsave_mask = compute_vrsave_mask ();
24329 /* Because the Darwin register save/restore routines only handle
24330 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24331 check. */
24332 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
24333 && (info->first_altivec_reg_save
24334 >= FIRST_SAVED_ALTIVEC_REGNO));
24337 return;
24341 static void
24342 is_altivec_return_reg (rtx reg, void *xyes)
24344 bool *yes = (bool *) xyes;
24345 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
24346 *yes = true;
24350 /* Return whether REG is a global user reg or has been specifed by
24351 -ffixed-REG. We should not restore these, and so cannot use
24352 lmw or out-of-line restore functions if there are any. We also
24353 can't save them (well, emit frame notes for them), because frame
24354 unwinding during exception handling will restore saved registers. */
24356 static bool
24357 fixed_reg_p (int reg)
24359 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24360 backend sets it, overriding anything the user might have given. */
24361 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24362 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24363 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24364 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24365 return false;
24367 return fixed_regs[reg];
24370 /* Determine the strategy for savings/restoring registers. */
24372 enum {
24373 SAVE_MULTIPLE = 0x1,
24374 SAVE_INLINE_GPRS = 0x2,
24375 SAVE_INLINE_FPRS = 0x4,
24376 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
24377 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
24378 SAVE_INLINE_VRS = 0x20,
24379 REST_MULTIPLE = 0x100,
24380 REST_INLINE_GPRS = 0x200,
24381 REST_INLINE_FPRS = 0x400,
24382 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
24383 REST_INLINE_VRS = 0x1000
24386 static int
24387 rs6000_savres_strategy (rs6000_stack_t *info,
24388 bool using_static_chain_p)
24390 int strategy = 0;
24392 /* Select between in-line and out-of-line save and restore of regs.
24393 First, all the obvious cases where we don't use out-of-line. */
24394 if (crtl->calls_eh_return
24395 || cfun->machine->ra_need_lr)
24396 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24397 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24398 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24400 if (info->first_gp_reg_save == 32)
24401 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24403 if (info->first_fp_reg_save == 64
24404 /* The out-of-line FP routines use double-precision stores;
24405 we can't use those routines if we don't have such stores. */
24406 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
24407 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24409 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24410 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24412 /* Define cutoff for using out-of-line functions to save registers. */
24413 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24415 if (!optimize_size)
24417 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24418 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24419 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24421 else
24423 /* Prefer out-of-line restore if it will exit. */
24424 if (info->first_fp_reg_save > 61)
24425 strategy |= SAVE_INLINE_FPRS;
24426 if (info->first_gp_reg_save > 29)
24428 if (info->first_fp_reg_save == 64)
24429 strategy |= SAVE_INLINE_GPRS;
24430 else
24431 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24433 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24434 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24437 else if (DEFAULT_ABI == ABI_DARWIN)
24439 if (info->first_fp_reg_save > 60)
24440 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24441 if (info->first_gp_reg_save > 29)
24442 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24443 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24445 else
24447 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24448 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
24449 || info->first_fp_reg_save > 61)
24450 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24451 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24452 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24455 /* Don't bother to try to save things out-of-line if r11 is occupied
24456 by the static chain. It would require too much fiddling and the
24457 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24458 pointer on Darwin, and AIX uses r1 or r12. */
24459 if (using_static_chain_p
24460 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24461 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24462 | SAVE_INLINE_GPRS
24463 | SAVE_INLINE_VRS);
24465 /* We can only use the out-of-line routines to restore fprs if we've
24466 saved all the registers from first_fp_reg_save in the prologue.
24467 Otherwise, we risk loading garbage. Of course, if we have saved
24468 out-of-line then we know we haven't skipped any fprs. */
24469 if ((strategy & SAVE_INLINE_FPRS)
24470 && !(strategy & REST_INLINE_FPRS))
24472 int i;
24474 for (i = info->first_fp_reg_save; i < 64; i++)
24475 if (fixed_regs[i] || !save_reg_p (i))
24477 strategy |= REST_INLINE_FPRS;
24478 break;
24482 /* Similarly, for altivec regs. */
24483 if ((strategy & SAVE_INLINE_VRS)
24484 && !(strategy & REST_INLINE_VRS))
24486 int i;
24488 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24489 if (fixed_regs[i] || !save_reg_p (i))
24491 strategy |= REST_INLINE_VRS;
24492 break;
24496 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24497 saved is an out-of-line save or restore. Set up the value for
24498 the next test (excluding out-of-line gprs). */
24499 bool lr_save_p = (info->lr_save_p
24500 || !(strategy & SAVE_INLINE_FPRS)
24501 || !(strategy & SAVE_INLINE_VRS)
24502 || !(strategy & REST_INLINE_FPRS)
24503 || !(strategy & REST_INLINE_VRS));
24505 if (TARGET_MULTIPLE
24506 && !TARGET_POWERPC64
24507 && info->first_gp_reg_save < 31
24508 && !(flag_shrink_wrap
24509 && flag_shrink_wrap_separate
24510 && optimize_function_for_speed_p (cfun)))
24512 /* Prefer store multiple for saves over out-of-line routines,
24513 since the store-multiple instruction will always be smaller. */
24514 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24516 /* The situation is more complicated with load multiple. We'd
24517 prefer to use the out-of-line routines for restores, since the
24518 "exit" out-of-line routines can handle the restore of LR and the
24519 frame teardown. However if doesn't make sense to use the
24520 out-of-line routine if that is the only reason we'd need to save
24521 LR, and we can't use the "exit" out-of-line gpr restore if we
24522 have saved some fprs; In those cases it is advantageous to use
24523 load multiple when available. */
24524 if (info->first_fp_reg_save != 64 || !lr_save_p)
24525 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24528 /* Using the "exit" out-of-line routine does not improve code size
24529 if using it would require lr to be saved and if only saving one
24530 or two gprs. */
24531 else if (!lr_save_p && info->first_gp_reg_save > 29)
24532 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24534 /* We can only use load multiple or the out-of-line routines to
24535 restore gprs if we've saved all the registers from
24536 first_gp_reg_save. Otherwise, we risk loading garbage.
24537 Of course, if we have saved out-of-line or used stmw then we know
24538 we haven't skipped any gprs. */
24539 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24540 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24542 int i;
24544 for (i = info->first_gp_reg_save; i < 32; i++)
24545 if (fixed_reg_p (i) || !save_reg_p (i))
24547 strategy |= REST_INLINE_GPRS;
24548 strategy &= ~REST_MULTIPLE;
24549 break;
24553 if (TARGET_ELF && TARGET_64BIT)
24555 if (!(strategy & SAVE_INLINE_FPRS))
24556 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24557 else if (!(strategy & SAVE_INLINE_GPRS)
24558 && info->first_fp_reg_save == 64)
24559 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24561 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24562 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24564 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24565 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24567 return strategy;
24570 /* Calculate the stack information for the current function. This is
24571 complicated by having two separate calling sequences, the AIX calling
24572 sequence and the V.4 calling sequence.
24574 AIX (and Darwin/Mac OS X) stack frames look like:
24575 32-bit 64-bit
24576 SP----> +---------------------------------------+
24577 | back chain to caller | 0 0
24578 +---------------------------------------+
24579 | saved CR | 4 8 (8-11)
24580 +---------------------------------------+
24581 | saved LR | 8 16
24582 +---------------------------------------+
24583 | reserved for compilers | 12 24
24584 +---------------------------------------+
24585 | reserved for binders | 16 32
24586 +---------------------------------------+
24587 | saved TOC pointer | 20 40
24588 +---------------------------------------+
24589 | Parameter save area (+padding*) (P) | 24 48
24590 +---------------------------------------+
24591 | Alloca space (A) | 24+P etc.
24592 +---------------------------------------+
24593 | Local variable space (L) | 24+P+A
24594 +---------------------------------------+
24595 | Float/int conversion temporary (X) | 24+P+A+L
24596 +---------------------------------------+
24597 | Save area for AltiVec registers (W) | 24+P+A+L+X
24598 +---------------------------------------+
24599 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24600 +---------------------------------------+
24601 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24602 +---------------------------------------+
24603 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24604 +---------------------------------------+
24605 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24606 +---------------------------------------+
24607 old SP->| back chain to caller's caller |
24608 +---------------------------------------+
24610 * If the alloca area is present, the parameter save area is
24611 padded so that the former starts 16-byte aligned.
24613 The required alignment for AIX configurations is two words (i.e., 8
24614 or 16 bytes).
24616 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24618 SP----> +---------------------------------------+
24619 | Back chain to caller | 0
24620 +---------------------------------------+
24621 | Save area for CR | 8
24622 +---------------------------------------+
24623 | Saved LR | 16
24624 +---------------------------------------+
24625 | Saved TOC pointer | 24
24626 +---------------------------------------+
24627 | Parameter save area (+padding*) (P) | 32
24628 +---------------------------------------+
24629 | Alloca space (A) | 32+P
24630 +---------------------------------------+
24631 | Local variable space (L) | 32+P+A
24632 +---------------------------------------+
24633 | Save area for AltiVec registers (W) | 32+P+A+L
24634 +---------------------------------------+
24635 | AltiVec alignment padding (Y) | 32+P+A+L+W
24636 +---------------------------------------+
24637 | Save area for GP registers (G) | 32+P+A+L+W+Y
24638 +---------------------------------------+
24639 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24640 +---------------------------------------+
24641 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24642 +---------------------------------------+
24644 * If the alloca area is present, the parameter save area is
24645 padded so that the former starts 16-byte aligned.
24647 V.4 stack frames look like:
24649 SP----> +---------------------------------------+
24650 | back chain to caller | 0
24651 +---------------------------------------+
24652 | caller's saved LR | 4
24653 +---------------------------------------+
24654 | Parameter save area (+padding*) (P) | 8
24655 +---------------------------------------+
24656 | Alloca space (A) | 8+P
24657 +---------------------------------------+
24658 | Varargs save area (V) | 8+P+A
24659 +---------------------------------------+
24660 | Local variable space (L) | 8+P+A+V
24661 +---------------------------------------+
24662 | Float/int conversion temporary (X) | 8+P+A+V+L
24663 +---------------------------------------+
24664 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24665 +---------------------------------------+
24666 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24667 +---------------------------------------+
24668 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24669 +---------------------------------------+
24670 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24671 +---------------------------------------+
24672 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24673 +---------------------------------------+
24674 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24675 +---------------------------------------+
24676 old SP->| back chain to caller's caller |
24677 +---------------------------------------+
24679 * If the alloca area is present and the required alignment is
24680 16 bytes, the parameter save area is padded so that the
24681 alloca area starts 16-byte aligned.
24683 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24684 given. (But note below and in sysv4.h that we require only 8 and
24685 may round up the size of our stack frame anyways. The historical
24686 reason is early versions of powerpc-linux which didn't properly
24687 align the stack at program startup. A happy side-effect is that
24688 -mno-eabi libraries can be used with -meabi programs.)
24690 The EABI configuration defaults to the V.4 layout. However,
24691 the stack alignment requirements may differ. If -mno-eabi is not
24692 given, the required stack alignment is 8 bytes; if -mno-eabi is
24693 given, the required alignment is 16 bytes. (But see V.4 comment
24694 above.) */
24696 #ifndef ABI_STACK_BOUNDARY
24697 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24698 #endif
24700 static rs6000_stack_t *
24701 rs6000_stack_info (void)
24703 /* We should never be called for thunks, we are not set up for that. */
24704 gcc_assert (!cfun->is_thunk);
24706 rs6000_stack_t *info = &stack_info;
24707 int reg_size = TARGET_32BIT ? 4 : 8;
24708 int ehrd_size;
24709 int ehcr_size;
24710 int save_align;
24711 int first_gp;
24712 HOST_WIDE_INT non_fixed_size;
24713 bool using_static_chain_p;
24715 if (reload_completed && info->reload_completed)
24716 return info;
24718 memset (info, 0, sizeof (*info));
24719 info->reload_completed = reload_completed;
24721 /* Select which calling sequence. */
24722 info->abi = DEFAULT_ABI;
24724 /* Calculate which registers need to be saved & save area size. */
24725 info->first_gp_reg_save = first_reg_to_save ();
24726 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24727 even if it currently looks like we won't. Reload may need it to
24728 get at a constant; if so, it will have already created a constant
24729 pool entry for it. */
24730 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24731 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24732 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24733 && crtl->uses_const_pool
24734 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24735 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24736 else
24737 first_gp = info->first_gp_reg_save;
24739 info->gp_size = reg_size * (32 - first_gp);
24741 info->first_fp_reg_save = first_fp_reg_to_save ();
24742 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24744 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24745 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24746 - info->first_altivec_reg_save);
24748 /* Does this function call anything? */
24749 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24751 /* Determine if we need to save the condition code registers. */
24752 if (save_reg_p (CR2_REGNO)
24753 || save_reg_p (CR3_REGNO)
24754 || save_reg_p (CR4_REGNO))
24756 info->cr_save_p = 1;
24757 if (DEFAULT_ABI == ABI_V4)
24758 info->cr_size = reg_size;
24761 /* If the current function calls __builtin_eh_return, then we need
24762 to allocate stack space for registers that will hold data for
24763 the exception handler. */
24764 if (crtl->calls_eh_return)
24766 unsigned int i;
24767 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24768 continue;
24770 ehrd_size = i * UNITS_PER_WORD;
24772 else
24773 ehrd_size = 0;
24775 /* In the ELFv2 ABI, we also need to allocate space for separate
24776 CR field save areas if the function calls __builtin_eh_return. */
24777 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24779 /* This hard-codes that we have three call-saved CR fields. */
24780 ehcr_size = 3 * reg_size;
24781 /* We do *not* use the regular CR save mechanism. */
24782 info->cr_save_p = 0;
24784 else
24785 ehcr_size = 0;
24787 /* Determine various sizes. */
24788 info->reg_size = reg_size;
24789 info->fixed_size = RS6000_SAVE_AREA;
24790 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24791 if (cfun->calls_alloca)
24792 info->parm_size =
24793 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24794 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24795 else
24796 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24797 TARGET_ALTIVEC ? 16 : 8);
24798 if (FRAME_GROWS_DOWNWARD)
24799 info->vars_size
24800 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24801 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24802 - (info->fixed_size + info->vars_size + info->parm_size);
24804 if (TARGET_ALTIVEC_ABI)
24805 info->vrsave_mask = compute_vrsave_mask ();
24807 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24808 info->vrsave_size = 4;
24810 compute_save_world_info (info);
24812 /* Calculate the offsets. */
24813 switch (DEFAULT_ABI)
24815 case ABI_NONE:
24816 default:
24817 gcc_unreachable ();
24819 case ABI_AIX:
24820 case ABI_ELFv2:
24821 case ABI_DARWIN:
24822 info->fp_save_offset = -info->fp_size;
24823 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24825 if (TARGET_ALTIVEC_ABI)
24827 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24829 /* Align stack so vector save area is on a quadword boundary.
24830 The padding goes above the vectors. */
24831 if (info->altivec_size != 0)
24832 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24834 info->altivec_save_offset = info->vrsave_save_offset
24835 - info->altivec_padding_size
24836 - info->altivec_size;
24837 gcc_assert (info->altivec_size == 0
24838 || info->altivec_save_offset % 16 == 0);
24840 /* Adjust for AltiVec case. */
24841 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24843 else
24844 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24846 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24847 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24848 info->lr_save_offset = 2*reg_size;
24849 break;
24851 case ABI_V4:
24852 info->fp_save_offset = -info->fp_size;
24853 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24854 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24856 if (TARGET_ALTIVEC_ABI)
24858 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24860 /* Align stack so vector save area is on a quadword boundary. */
24861 if (info->altivec_size != 0)
24862 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24864 info->altivec_save_offset = info->vrsave_save_offset
24865 - info->altivec_padding_size
24866 - info->altivec_size;
24868 /* Adjust for AltiVec case. */
24869 info->ehrd_offset = info->altivec_save_offset;
24871 else
24872 info->ehrd_offset = info->cr_save_offset;
24874 info->ehrd_offset -= ehrd_size;
24875 info->lr_save_offset = reg_size;
24878 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24879 info->save_size = RS6000_ALIGN (info->fp_size
24880 + info->gp_size
24881 + info->altivec_size
24882 + info->altivec_padding_size
24883 + ehrd_size
24884 + ehcr_size
24885 + info->cr_size
24886 + info->vrsave_size,
24887 save_align);
24889 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24891 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24892 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24894 /* Determine if we need to save the link register. */
24895 if (info->calls_p
24896 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24897 && crtl->profile
24898 && !TARGET_PROFILE_KERNEL)
24899 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24900 #ifdef TARGET_RELOCATABLE
24901 || (DEFAULT_ABI == ABI_V4
24902 && (TARGET_RELOCATABLE || flag_pic > 1)
24903 && !constant_pool_empty_p ())
24904 #endif
24905 || rs6000_ra_ever_killed ())
24906 info->lr_save_p = 1;
24908 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24909 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24910 && call_used_regs[STATIC_CHAIN_REGNUM]);
24911 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24913 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24914 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24915 || !(info->savres_strategy & SAVE_INLINE_VRS)
24916 || !(info->savres_strategy & REST_INLINE_GPRS)
24917 || !(info->savres_strategy & REST_INLINE_FPRS)
24918 || !(info->savres_strategy & REST_INLINE_VRS))
24919 info->lr_save_p = 1;
24921 if (info->lr_save_p)
24922 df_set_regs_ever_live (LR_REGNO, true);
24924 /* Determine if we need to allocate any stack frame:
24926 For AIX we need to push the stack if a frame pointer is needed
24927 (because the stack might be dynamically adjusted), if we are
24928 debugging, if we make calls, or if the sum of fp_save, gp_save,
24929 and local variables are more than the space needed to save all
24930 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24931 + 18*8 = 288 (GPR13 reserved).
24933 For V.4 we don't have the stack cushion that AIX uses, but assume
24934 that the debugger can handle stackless frames. */
24936 if (info->calls_p)
24937 info->push_p = 1;
24939 else if (DEFAULT_ABI == ABI_V4)
24940 info->push_p = non_fixed_size != 0;
24942 else if (frame_pointer_needed)
24943 info->push_p = 1;
24945 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24946 info->push_p = 1;
24948 else
24949 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24951 return info;
24954 static void
24955 debug_stack_info (rs6000_stack_t *info)
24957 const char *abi_string;
24959 if (! info)
24960 info = rs6000_stack_info ();
24962 fprintf (stderr, "\nStack information for function %s:\n",
24963 ((current_function_decl && DECL_NAME (current_function_decl))
24964 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24965 : "<unknown>"));
24967 switch (info->abi)
24969 default: abi_string = "Unknown"; break;
24970 case ABI_NONE: abi_string = "NONE"; break;
24971 case ABI_AIX: abi_string = "AIX"; break;
24972 case ABI_ELFv2: abi_string = "ELFv2"; break;
24973 case ABI_DARWIN: abi_string = "Darwin"; break;
24974 case ABI_V4: abi_string = "V.4"; break;
24977 fprintf (stderr, "\tABI = %5s\n", abi_string);
24979 if (TARGET_ALTIVEC_ABI)
24980 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24982 if (info->first_gp_reg_save != 32)
24983 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24985 if (info->first_fp_reg_save != 64)
24986 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24988 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24989 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24990 info->first_altivec_reg_save);
24992 if (info->lr_save_p)
24993 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24995 if (info->cr_save_p)
24996 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24998 if (info->vrsave_mask)
24999 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
25001 if (info->push_p)
25002 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
25004 if (info->calls_p)
25005 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
25007 if (info->gp_size)
25008 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
25010 if (info->fp_size)
25011 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
25013 if (info->altivec_size)
25014 fprintf (stderr, "\taltivec_save_offset = %5d\n",
25015 info->altivec_save_offset);
25017 if (info->vrsave_size)
25018 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
25019 info->vrsave_save_offset);
25021 if (info->lr_save_p)
25022 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
25024 if (info->cr_save_p)
25025 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
25027 if (info->varargs_save_offset)
25028 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
25030 if (info->total_size)
25031 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25032 info->total_size);
25034 if (info->vars_size)
25035 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25036 info->vars_size);
25038 if (info->parm_size)
25039 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
25041 if (info->fixed_size)
25042 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
25044 if (info->gp_size)
25045 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
25047 if (info->fp_size)
25048 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
25050 if (info->altivec_size)
25051 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
25053 if (info->vrsave_size)
25054 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
25056 if (info->altivec_padding_size)
25057 fprintf (stderr, "\taltivec_padding_size= %5d\n",
25058 info->altivec_padding_size);
25060 if (info->cr_size)
25061 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
25063 if (info->save_size)
25064 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
25066 if (info->reg_size != 4)
25067 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
25069 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
25071 fprintf (stderr, "\n");
25075 rs6000_return_addr (int count, rtx frame)
25077 /* Currently we don't optimize very well between prolog and body
25078 code and for PIC code the code can be actually quite bad, so
25079 don't try to be too clever here. */
25080 if (count != 0
25081 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
25083 cfun->machine->ra_needs_full_frame = 1;
25085 return
25086 gen_rtx_MEM
25087 (Pmode,
25088 memory_address
25089 (Pmode,
25090 plus_constant (Pmode,
25091 copy_to_reg
25092 (gen_rtx_MEM (Pmode,
25093 memory_address (Pmode, frame))),
25094 RETURN_ADDRESS_OFFSET)));
25097 cfun->machine->ra_need_lr = 1;
25098 return get_hard_reg_initial_val (Pmode, LR_REGNO);
25101 /* Say whether a function is a candidate for sibcall handling or not. */
25103 static bool
25104 rs6000_function_ok_for_sibcall (tree decl, tree exp)
25106 tree fntype;
25108 if (decl)
25109 fntype = TREE_TYPE (decl);
25110 else
25111 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
25113 /* We can't do it if the called function has more vector parameters
25114 than the current function; there's nowhere to put the VRsave code. */
25115 if (TARGET_ALTIVEC_ABI
25116 && TARGET_ALTIVEC_VRSAVE
25117 && !(decl && decl == current_function_decl))
25119 function_args_iterator args_iter;
25120 tree type;
25121 int nvreg = 0;
25123 /* Functions with vector parameters are required to have a
25124 prototype, so the argument type info must be available
25125 here. */
25126 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
25127 if (TREE_CODE (type) == VECTOR_TYPE
25128 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25129 nvreg++;
25131 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
25132 if (TREE_CODE (type) == VECTOR_TYPE
25133 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25134 nvreg--;
25136 if (nvreg > 0)
25137 return false;
25140 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25141 functions, because the callee may have a different TOC pointer to
25142 the caller and there's no way to ensure we restore the TOC when
25143 we return. With the secure-plt SYSV ABI we can't make non-local
25144 calls when -fpic/PIC because the plt call stubs use r30. */
25145 if (DEFAULT_ABI == ABI_DARWIN
25146 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25147 && decl
25148 && !DECL_EXTERNAL (decl)
25149 && !DECL_WEAK (decl)
25150 && (*targetm.binds_local_p) (decl))
25151 || (DEFAULT_ABI == ABI_V4
25152 && (!TARGET_SECURE_PLT
25153 || !flag_pic
25154 || (decl
25155 && (*targetm.binds_local_p) (decl)))))
25157 tree attr_list = TYPE_ATTRIBUTES (fntype);
25159 if (!lookup_attribute ("longcall", attr_list)
25160 || lookup_attribute ("shortcall", attr_list))
25161 return true;
25164 return false;
25167 static int
25168 rs6000_ra_ever_killed (void)
25170 rtx_insn *top;
25171 rtx reg;
25172 rtx_insn *insn;
25174 if (cfun->is_thunk)
25175 return 0;
25177 if (cfun->machine->lr_save_state)
25178 return cfun->machine->lr_save_state - 1;
25180 /* regs_ever_live has LR marked as used if any sibcalls are present,
25181 but this should not force saving and restoring in the
25182 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25183 clobbers LR, so that is inappropriate. */
25185 /* Also, the prologue can generate a store into LR that
25186 doesn't really count, like this:
25188 move LR->R0
25189 bcl to set PIC register
25190 move LR->R31
25191 move R0->LR
25193 When we're called from the epilogue, we need to avoid counting
25194 this as a store. */
25196 push_topmost_sequence ();
25197 top = get_insns ();
25198 pop_topmost_sequence ();
25199 reg = gen_rtx_REG (Pmode, LR_REGNO);
25201 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
25203 if (INSN_P (insn))
25205 if (CALL_P (insn))
25207 if (!SIBLING_CALL_P (insn))
25208 return 1;
25210 else if (find_regno_note (insn, REG_INC, LR_REGNO))
25211 return 1;
25212 else if (set_of (reg, insn) != NULL_RTX
25213 && !prologue_epilogue_contains (insn))
25214 return 1;
25217 return 0;
25220 /* Emit instructions needed to load the TOC register.
25221 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25222 a constant pool; or for SVR4 -fpic. */
25224 void
25225 rs6000_emit_load_toc_table (int fromprolog)
25227 rtx dest;
25228 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25230 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
25232 char buf[30];
25233 rtx lab, tmp1, tmp2, got;
25235 lab = gen_label_rtx ();
25236 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
25237 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25238 if (flag_pic == 2)
25240 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25241 need_toc_init = 1;
25243 else
25244 got = rs6000_got_sym ();
25245 tmp1 = tmp2 = dest;
25246 if (!fromprolog)
25248 tmp1 = gen_reg_rtx (Pmode);
25249 tmp2 = gen_reg_rtx (Pmode);
25251 emit_insn (gen_load_toc_v4_PIC_1 (lab));
25252 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
25253 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
25254 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
25256 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
25258 emit_insn (gen_load_toc_v4_pic_si ());
25259 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25261 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
25263 char buf[30];
25264 rtx temp0 = (fromprolog
25265 ? gen_rtx_REG (Pmode, 0)
25266 : gen_reg_rtx (Pmode));
25268 if (fromprolog)
25270 rtx symF, symL;
25272 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25273 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25275 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
25276 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25278 emit_insn (gen_load_toc_v4_PIC_1 (symF));
25279 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25280 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
25282 else
25284 rtx tocsym, lab;
25286 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25287 need_toc_init = 1;
25288 lab = gen_label_rtx ();
25289 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
25290 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25291 if (TARGET_LINK_STACK)
25292 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
25293 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
25295 emit_insn (gen_addsi3 (dest, temp0, dest));
25297 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
25299 /* This is for AIX code running in non-PIC ELF32. */
25300 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25302 need_toc_init = 1;
25303 emit_insn (gen_elf_high (dest, realsym));
25304 emit_insn (gen_elf_low (dest, dest, realsym));
25306 else
25308 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25310 if (TARGET_32BIT)
25311 emit_insn (gen_load_toc_aix_si (dest));
25312 else
25313 emit_insn (gen_load_toc_aix_di (dest));
25317 /* Emit instructions to restore the link register after determining where
25318 its value has been stored. */
25320 void
25321 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
25323 rs6000_stack_t *info = rs6000_stack_info ();
25324 rtx operands[2];
25326 operands[0] = source;
25327 operands[1] = scratch;
25329 if (info->lr_save_p)
25331 rtx frame_rtx = stack_pointer_rtx;
25332 HOST_WIDE_INT sp_offset = 0;
25333 rtx tmp;
25335 if (frame_pointer_needed
25336 || cfun->calls_alloca
25337 || info->total_size > 32767)
25339 tmp = gen_frame_mem (Pmode, frame_rtx);
25340 emit_move_insn (operands[1], tmp);
25341 frame_rtx = operands[1];
25343 else if (info->push_p)
25344 sp_offset = info->total_size;
25346 tmp = plus_constant (Pmode, frame_rtx,
25347 info->lr_save_offset + sp_offset);
25348 tmp = gen_frame_mem (Pmode, tmp);
25349 emit_move_insn (tmp, operands[0]);
25351 else
25352 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25354 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25355 state of lr_save_p so any change from here on would be a bug. In
25356 particular, stop rs6000_ra_ever_killed from considering the SET
25357 of lr we may have added just above. */
25358 cfun->machine->lr_save_state = info->lr_save_p + 1;
25361 static GTY(()) alias_set_type set = -1;
25363 alias_set_type
25364 get_TOC_alias_set (void)
25366 if (set == -1)
25367 set = new_alias_set ();
25368 return set;
25371 /* This returns nonzero if the current function uses the TOC. This is
25372 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25373 is generated by the ABI_V4 load_toc_* patterns. */
25374 #if TARGET_ELF
25375 static int
25376 uses_TOC (void)
25378 rtx_insn *insn;
25380 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25381 if (INSN_P (insn))
25383 rtx pat = PATTERN (insn);
25384 int i;
25386 if (GET_CODE (pat) == PARALLEL)
25387 for (i = 0; i < XVECLEN (pat, 0); i++)
25389 rtx sub = XVECEXP (pat, 0, i);
25390 if (GET_CODE (sub) == USE)
25392 sub = XEXP (sub, 0);
25393 if (GET_CODE (sub) == UNSPEC
25394 && XINT (sub, 1) == UNSPEC_TOC)
25395 return 1;
25399 return 0;
25401 #endif
25404 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25406 rtx tocrel, tocreg, hi;
25408 if (TARGET_DEBUG_ADDR)
25410 if (GET_CODE (symbol) == SYMBOL_REF)
25411 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25412 XSTR (symbol, 0));
25413 else
25415 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25416 GET_RTX_NAME (GET_CODE (symbol)));
25417 debug_rtx (symbol);
25421 if (!can_create_pseudo_p ())
25422 df_set_regs_ever_live (TOC_REGISTER, true);
25424 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25425 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25426 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25427 return tocrel;
25429 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25430 if (largetoc_reg != NULL)
25432 emit_move_insn (largetoc_reg, hi);
25433 hi = largetoc_reg;
25435 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25438 /* Issue assembly directives that create a reference to the given DWARF
25439 FRAME_TABLE_LABEL from the current function section. */
25440 void
25441 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25443 fprintf (asm_out_file, "\t.ref %s\n",
25444 (* targetm.strip_name_encoding) (frame_table_label));
25447 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25448 and the change to the stack pointer. */
25450 static void
25451 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25453 rtvec p;
25454 int i;
25455 rtx regs[3];
25457 i = 0;
25458 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25459 if (hard_frame_needed)
25460 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25461 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25462 || (hard_frame_needed
25463 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25464 regs[i++] = fp;
25466 p = rtvec_alloc (i);
25467 while (--i >= 0)
25469 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25470 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25473 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25476 /* Emit the correct code for allocating stack space, as insns.
25477 If COPY_REG, make sure a copy of the old frame is left there.
25478 The generated code may use hard register 0 as a temporary. */
25480 static rtx_insn *
25481 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25483 rtx_insn *insn;
25484 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25485 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25486 rtx todec = gen_int_mode (-size, Pmode);
25487 rtx par, set, mem;
25489 if (INTVAL (todec) != -size)
25491 warning (0, "stack frame too large");
25492 emit_insn (gen_trap ());
25493 return 0;
25496 if (crtl->limit_stack)
25498 if (REG_P (stack_limit_rtx)
25499 && REGNO (stack_limit_rtx) > 1
25500 && REGNO (stack_limit_rtx) <= 31)
25502 rtx_insn *insn
25503 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
25504 gcc_assert (insn);
25505 emit_insn (insn);
25506 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
25508 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
25509 && TARGET_32BIT
25510 && DEFAULT_ABI == ABI_V4
25511 && !flag_pic)
25513 rtx toload = gen_rtx_CONST (VOIDmode,
25514 gen_rtx_PLUS (Pmode,
25515 stack_limit_rtx,
25516 GEN_INT (size)));
25518 emit_insn (gen_elf_high (tmp_reg, toload));
25519 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25520 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25521 const0_rtx));
25523 else
25524 warning (0, "stack limit expression is not supported");
25527 if (copy_reg)
25529 if (copy_off != 0)
25530 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25531 else
25532 emit_move_insn (copy_reg, stack_reg);
25535 if (size > 32767)
25537 /* Need a note here so that try_split doesn't get confused. */
25538 if (get_last_insn () == NULL_RTX)
25539 emit_note (NOTE_INSN_DELETED);
25540 insn = emit_move_insn (tmp_reg, todec);
25541 try_split (PATTERN (insn), insn, 0);
25542 todec = tmp_reg;
25545 insn = emit_insn (TARGET_32BIT
25546 ? gen_movsi_update_stack (stack_reg, stack_reg,
25547 todec, stack_reg)
25548 : gen_movdi_di_update_stack (stack_reg, stack_reg,
25549 todec, stack_reg));
25550 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25551 it now and set the alias set/attributes. The above gen_*_update
25552 calls will generate a PARALLEL with the MEM set being the first
25553 operation. */
25554 par = PATTERN (insn);
25555 gcc_assert (GET_CODE (par) == PARALLEL);
25556 set = XVECEXP (par, 0, 0);
25557 gcc_assert (GET_CODE (set) == SET);
25558 mem = SET_DEST (set);
25559 gcc_assert (MEM_P (mem));
25560 MEM_NOTRAP_P (mem) = 1;
25561 set_mem_alias_set (mem, get_frame_alias_set ());
25563 RTX_FRAME_RELATED_P (insn) = 1;
25564 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25565 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
25566 GEN_INT (-size))));
25567 return insn;
25570 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25572 #if PROBE_INTERVAL > 32768
25573 #error Cannot use indexed addressing mode for stack probing
25574 #endif
25576 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25577 inclusive. These are offsets from the current stack pointer. */
25579 static void
25580 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25582 /* See if we have a constant small number of probes to generate. If so,
25583 that's the easy case. */
25584 if (first + size <= 32768)
25586 HOST_WIDE_INT i;
25588 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25589 it exceeds SIZE. If only one probe is needed, this will not
25590 generate any code. Then probe at FIRST + SIZE. */
25591 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25592 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25593 -(first + i)));
25595 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25596 -(first + size)));
25599 /* Otherwise, do the same as above, but in a loop. Note that we must be
25600 extra careful with variables wrapping around because we might be at
25601 the very top (or the very bottom) of the address space and we have
25602 to be able to handle this case properly; in particular, we use an
25603 equality test for the loop condition. */
25604 else
25606 HOST_WIDE_INT rounded_size;
25607 rtx r12 = gen_rtx_REG (Pmode, 12);
25608 rtx r0 = gen_rtx_REG (Pmode, 0);
25610 /* Sanity check for the addressing mode we're going to use. */
25611 gcc_assert (first <= 32768);
25613 /* Step 1: round SIZE to the previous multiple of the interval. */
25615 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25618 /* Step 2: compute initial and final value of the loop counter. */
25620 /* TEST_ADDR = SP + FIRST. */
25621 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25622 -first)));
25624 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25625 if (rounded_size > 32768)
25627 emit_move_insn (r0, GEN_INT (-rounded_size));
25628 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25630 else
25631 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25632 -rounded_size)));
25635 /* Step 3: the loop
25639 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25640 probe at TEST_ADDR
25642 while (TEST_ADDR != LAST_ADDR)
25644 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25645 until it is equal to ROUNDED_SIZE. */
25647 if (TARGET_64BIT)
25648 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
25649 else
25650 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
25653 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25654 that SIZE is equal to ROUNDED_SIZE. */
25656 if (size != rounded_size)
25657 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25661 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25662 absolute addresses. */
25664 const char *
25665 output_probe_stack_range (rtx reg1, rtx reg2)
25667 static int labelno = 0;
25668 char loop_lab[32];
25669 rtx xops[2];
25671 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25673 /* Loop. */
25674 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25676 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25677 xops[0] = reg1;
25678 xops[1] = GEN_INT (-PROBE_INTERVAL);
25679 output_asm_insn ("addi %0,%0,%1", xops);
25681 /* Probe at TEST_ADDR. */
25682 xops[1] = gen_rtx_REG (Pmode, 0);
25683 output_asm_insn ("stw %1,0(%0)", xops);
25685 /* Test if TEST_ADDR == LAST_ADDR. */
25686 xops[1] = reg2;
25687 if (TARGET_64BIT)
25688 output_asm_insn ("cmpd 0,%0,%1", xops);
25689 else
25690 output_asm_insn ("cmpw 0,%0,%1", xops);
25692 /* Branch. */
25693 fputs ("\tbne 0,", asm_out_file);
25694 assemble_name_raw (asm_out_file, loop_lab);
25695 fputc ('\n', asm_out_file);
25697 return "";
25700 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25701 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25702 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25703 deduce these equivalences by itself so it wasn't necessary to hold
25704 its hand so much. Don't be tempted to always supply d2_f_d_e with
25705 the actual cfa register, ie. r31 when we are using a hard frame
25706 pointer. That fails when saving regs off r1, and sched moves the
25707 r31 setup past the reg saves. */
25709 static rtx_insn *
25710 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
25711 rtx reg2, rtx repl2)
25713 rtx repl;
25715 if (REGNO (reg) == STACK_POINTER_REGNUM)
25717 gcc_checking_assert (val == 0);
25718 repl = NULL_RTX;
25720 else
25721 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25722 GEN_INT (val));
25724 rtx pat = PATTERN (insn);
25725 if (!repl && !reg2)
25727 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25728 if (GET_CODE (pat) == PARALLEL)
25729 for (int i = 0; i < XVECLEN (pat, 0); i++)
25730 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25732 rtx set = XVECEXP (pat, 0, i);
25734 /* If this PARALLEL has been emitted for out-of-line
25735 register save functions, or store multiple, then omit
25736 eh_frame info for any user-defined global regs. If
25737 eh_frame info is supplied, frame unwinding will
25738 restore a user reg. */
25739 if (!REG_P (SET_SRC (set))
25740 || !fixed_reg_p (REGNO (SET_SRC (set))))
25741 RTX_FRAME_RELATED_P (set) = 1;
25743 RTX_FRAME_RELATED_P (insn) = 1;
25744 return insn;
25747 /* We expect that 'pat' is either a SET or a PARALLEL containing
25748 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25749 are important so they all have to be marked RTX_FRAME_RELATED_P.
25750 Call simplify_replace_rtx on the SETs rather than the whole insn
25751 so as to leave the other stuff alone (for example USE of r12). */
25753 set_used_flags (pat);
25754 if (GET_CODE (pat) == SET)
25756 if (repl)
25757 pat = simplify_replace_rtx (pat, reg, repl);
25758 if (reg2)
25759 pat = simplify_replace_rtx (pat, reg2, repl2);
25761 else if (GET_CODE (pat) == PARALLEL)
25763 pat = shallow_copy_rtx (pat);
25764 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25766 for (int i = 0; i < XVECLEN (pat, 0); i++)
25767 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25769 rtx set = XVECEXP (pat, 0, i);
25771 if (repl)
25772 set = simplify_replace_rtx (set, reg, repl);
25773 if (reg2)
25774 set = simplify_replace_rtx (set, reg2, repl2);
25775 XVECEXP (pat, 0, i) = set;
25777 /* Omit eh_frame info for any user-defined global regs. */
25778 if (!REG_P (SET_SRC (set))
25779 || !fixed_reg_p (REGNO (SET_SRC (set))))
25780 RTX_FRAME_RELATED_P (set) = 1;
25783 else
25784 gcc_unreachable ();
25786 RTX_FRAME_RELATED_P (insn) = 1;
25787 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
25789 return insn;
25792 /* Returns an insn that has a vrsave set operation with the
25793 appropriate CLOBBERs. */
25795 static rtx
25796 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25798 int nclobs, i;
25799 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25800 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25802 clobs[0]
25803 = gen_rtx_SET (vrsave,
25804 gen_rtx_UNSPEC_VOLATILE (SImode,
25805 gen_rtvec (2, reg, vrsave),
25806 UNSPECV_SET_VRSAVE));
25808 nclobs = 1;
25810 /* We need to clobber the registers in the mask so the scheduler
25811 does not move sets to VRSAVE before sets of AltiVec registers.
25813 However, if the function receives nonlocal gotos, reload will set
25814 all call saved registers live. We will end up with:
25816 (set (reg 999) (mem))
25817 (parallel [ (set (reg vrsave) (unspec blah))
25818 (clobber (reg 999))])
25820 The clobber will cause the store into reg 999 to be dead, and
25821 flow will attempt to delete an epilogue insn. In this case, we
25822 need an unspec use/set of the register. */
25824 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25825 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25827 if (!epiloguep || call_used_regs [i])
25828 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
25829 gen_rtx_REG (V4SImode, i));
25830 else
25832 rtx reg = gen_rtx_REG (V4SImode, i);
25834 clobs[nclobs++]
25835 = gen_rtx_SET (reg,
25836 gen_rtx_UNSPEC (V4SImode,
25837 gen_rtvec (1, reg), 27));
25841 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
25843 for (i = 0; i < nclobs; ++i)
25844 XVECEXP (insn, 0, i) = clobs[i];
25846 return insn;
25849 static rtx
25850 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
25852 rtx addr, mem;
25854 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
25855 mem = gen_frame_mem (GET_MODE (reg), addr);
25856 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
25859 static rtx
25860 gen_frame_load (rtx reg, rtx frame_reg, int offset)
25862 return gen_frame_set (reg, frame_reg, offset, false);
25865 static rtx
25866 gen_frame_store (rtx reg, rtx frame_reg, int offset)
25868 return gen_frame_set (reg, frame_reg, offset, true);
25871 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
25872 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
25874 static rtx_insn *
25875 emit_frame_save (rtx frame_reg, machine_mode mode,
25876 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
25878 rtx reg;
25880 /* Some cases that need register indexed addressing. */
25881 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
25882 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
25884 reg = gen_rtx_REG (mode, regno);
25885 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
25886 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
25887 NULL_RTX, NULL_RTX);
25890 /* Emit an offset memory reference suitable for a frame store, while
25891 converting to a valid addressing mode. */
25893 static rtx
25894 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
25896 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
25899 #ifndef TARGET_FIX_AND_CONTINUE
25900 #define TARGET_FIX_AND_CONTINUE 0
25901 #endif
25903 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
25904 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
25905 #define LAST_SAVRES_REGISTER 31
25906 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
25908 enum {
25909 SAVRES_LR = 0x1,
25910 SAVRES_SAVE = 0x2,
25911 SAVRES_REG = 0x0c,
25912 SAVRES_GPR = 0,
25913 SAVRES_FPR = 4,
25914 SAVRES_VR = 8
25917 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
25919 /* Temporary holding space for an out-of-line register save/restore
25920 routine name. */
25921 static char savres_routine_name[30];
25923 /* Return the name for an out-of-line register save/restore routine.
25924 We are saving/restoring GPRs if GPR is true. */
25926 static char *
25927 rs6000_savres_routine_name (int regno, int sel)
25929 const char *prefix = "";
25930 const char *suffix = "";
25932 /* Different targets are supposed to define
25933 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
25934 routine name could be defined with:
25936 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
25938 This is a nice idea in practice, but in reality, things are
25939 complicated in several ways:
25941 - ELF targets have save/restore routines for GPRs.
25943 - PPC64 ELF targets have routines for save/restore of GPRs that
25944 differ in what they do with the link register, so having a set
25945 prefix doesn't work. (We only use one of the save routines at
25946 the moment, though.)
25948 - PPC32 elf targets have "exit" versions of the restore routines
25949 that restore the link register and can save some extra space.
25950 These require an extra suffix. (There are also "tail" versions
25951 of the restore routines and "GOT" versions of the save routines,
25952 but we don't generate those at present. Same problems apply,
25953 though.)
25955 We deal with all this by synthesizing our own prefix/suffix and
25956 using that for the simple sprintf call shown above. */
25957 if (DEFAULT_ABI == ABI_V4)
25959 if (TARGET_64BIT)
25960 goto aix_names;
25962 if ((sel & SAVRES_REG) == SAVRES_GPR)
25963 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25964 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25965 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25966 else if ((sel & SAVRES_REG) == SAVRES_VR)
25967 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25968 else
25969 abort ();
25971 if ((sel & SAVRES_LR))
25972 suffix = "_x";
25974 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25976 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25977 /* No out-of-line save/restore routines for GPRs on AIX. */
25978 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25979 #endif
25981 aix_names:
25982 if ((sel & SAVRES_REG) == SAVRES_GPR)
25983 prefix = ((sel & SAVRES_SAVE)
25984 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25985 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25986 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25988 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25989 if ((sel & SAVRES_LR))
25990 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25991 else
25992 #endif
25994 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25995 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25998 else if ((sel & SAVRES_REG) == SAVRES_VR)
25999 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26000 else
26001 abort ();
26004 if (DEFAULT_ABI == ABI_DARWIN)
26006 /* The Darwin approach is (slightly) different, in order to be
26007 compatible with code generated by the system toolchain. There is a
26008 single symbol for the start of save sequence, and the code here
26009 embeds an offset into that code on the basis of the first register
26010 to be saved. */
26011 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
26012 if ((sel & SAVRES_REG) == SAVRES_GPR)
26013 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
26014 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
26015 (regno - 13) * 4, prefix, regno);
26016 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26017 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
26018 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
26019 else if ((sel & SAVRES_REG) == SAVRES_VR)
26020 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
26021 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
26022 else
26023 abort ();
26025 else
26026 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
26028 return savres_routine_name;
26031 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
26032 We are saving/restoring GPRs if GPR is true. */
26034 static rtx
26035 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
26037 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
26038 ? info->first_gp_reg_save
26039 : (sel & SAVRES_REG) == SAVRES_FPR
26040 ? info->first_fp_reg_save - 32
26041 : (sel & SAVRES_REG) == SAVRES_VR
26042 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
26043 : -1);
26044 rtx sym;
26045 int select = sel;
26047 /* Don't generate bogus routine names. */
26048 gcc_assert (FIRST_SAVRES_REGISTER <= regno
26049 && regno <= LAST_SAVRES_REGISTER
26050 && select >= 0 && select <= 12);
26052 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26054 if (sym == NULL)
26056 char *name;
26058 name = rs6000_savres_routine_name (regno, sel);
26060 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26061 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26062 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26065 return sym;
26068 /* Emit a sequence of insns, including a stack tie if needed, for
26069 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26070 reset the stack pointer, but move the base of the frame into
26071 reg UPDT_REGNO for use by out-of-line register restore routines. */
26073 static rtx
26074 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26075 unsigned updt_regno)
26077 /* If there is nothing to do, don't do anything. */
26078 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
26079 return NULL_RTX;
26081 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26083 /* This blockage is needed so that sched doesn't decide to move
26084 the sp change before the register restores. */
26085 if (DEFAULT_ABI == ABI_V4)
26086 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
26087 GEN_INT (frame_off)));
26089 /* If we are restoring registers out-of-line, we will be using the
26090 "exit" variants of the restore routines, which will reset the
26091 stack for us. But we do need to point updt_reg into the
26092 right place for those routines. */
26093 if (frame_off != 0)
26094 return emit_insn (gen_add3_insn (updt_reg_rtx,
26095 frame_reg_rtx, GEN_INT (frame_off)));
26096 else
26097 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26099 return NULL_RTX;
26102 /* Return the register number used as a pointer by out-of-line
26103 save/restore functions. */
26105 static inline unsigned
26106 ptr_regno_for_savres (int sel)
26108 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26109 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26110 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26113 /* Construct a parallel rtx describing the effect of a call to an
26114 out-of-line register save/restore routine, and emit the insn
26115 or jump_insn as appropriate. */
26117 static rtx_insn *
26118 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26119 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26120 machine_mode reg_mode, int sel)
26122 int i;
26123 int offset, start_reg, end_reg, n_regs, use_reg;
26124 int reg_size = GET_MODE_SIZE (reg_mode);
26125 rtx sym;
26126 rtvec p;
26127 rtx par;
26128 rtx_insn *insn;
26130 offset = 0;
26131 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26132 ? info->first_gp_reg_save
26133 : (sel & SAVRES_REG) == SAVRES_FPR
26134 ? info->first_fp_reg_save
26135 : (sel & SAVRES_REG) == SAVRES_VR
26136 ? info->first_altivec_reg_save
26137 : -1);
26138 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26139 ? 32
26140 : (sel & SAVRES_REG) == SAVRES_FPR
26141 ? 64
26142 : (sel & SAVRES_REG) == SAVRES_VR
26143 ? LAST_ALTIVEC_REGNO + 1
26144 : -1);
26145 n_regs = end_reg - start_reg;
26146 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26147 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26148 + n_regs);
26150 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26151 RTVEC_ELT (p, offset++) = ret_rtx;
26153 RTVEC_ELT (p, offset++)
26154 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
26156 sym = rs6000_savres_routine_sym (info, sel);
26157 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26159 use_reg = ptr_regno_for_savres (sel);
26160 if ((sel & SAVRES_REG) == SAVRES_VR)
26162 /* Vector regs are saved/restored using [reg+reg] addressing. */
26163 RTVEC_ELT (p, offset++)
26164 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26165 RTVEC_ELT (p, offset++)
26166 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26168 else
26169 RTVEC_ELT (p, offset++)
26170 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26172 for (i = 0; i < end_reg - start_reg; i++)
26173 RTVEC_ELT (p, i + offset)
26174 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26175 frame_reg_rtx, save_area_offset + reg_size * i,
26176 (sel & SAVRES_SAVE) != 0);
26178 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26179 RTVEC_ELT (p, i + offset)
26180 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26182 par = gen_rtx_PARALLEL (VOIDmode, p);
26184 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26186 insn = emit_jump_insn (par);
26187 JUMP_LABEL (insn) = ret_rtx;
26189 else
26190 insn = emit_insn (par);
26191 return insn;
26194 /* Emit code to store CR fields that need to be saved into REG. */
26196 static void
26197 rs6000_emit_move_from_cr (rtx reg)
26199 /* Only the ELFv2 ABI allows storing only selected fields. */
26200 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26202 int i, cr_reg[8], count = 0;
26204 /* Collect CR fields that must be saved. */
26205 for (i = 0; i < 8; i++)
26206 if (save_reg_p (CR0_REGNO + i))
26207 cr_reg[count++] = i;
26209 /* If it's just a single one, use mfcrf. */
26210 if (count == 1)
26212 rtvec p = rtvec_alloc (1);
26213 rtvec r = rtvec_alloc (2);
26214 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26215 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26216 RTVEC_ELT (p, 0)
26217 = gen_rtx_SET (reg,
26218 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26220 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26221 return;
26224 /* ??? It might be better to handle count == 2 / 3 cases here
26225 as well, using logical operations to combine the values. */
26228 emit_insn (gen_movesi_from_cr (reg));
26231 /* Return whether the split-stack arg pointer (r12) is used. */
26233 static bool
26234 split_stack_arg_pointer_used_p (void)
26236 /* If the pseudo holding the arg pointer is no longer a pseudo,
26237 then the arg pointer is used. */
26238 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26239 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26240 || (REGNO (cfun->machine->split_stack_arg_pointer)
26241 < FIRST_PSEUDO_REGISTER)))
26242 return true;
26244 /* Unfortunately we also need to do some code scanning, since
26245 r12 may have been substituted for the pseudo. */
26246 rtx_insn *insn;
26247 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26248 FOR_BB_INSNS (bb, insn)
26249 if (NONDEBUG_INSN_P (insn))
26251 /* A call destroys r12. */
26252 if (CALL_P (insn))
26253 return false;
26255 df_ref use;
26256 FOR_EACH_INSN_USE (use, insn)
26258 rtx x = DF_REF_REG (use);
26259 if (REG_P (x) && REGNO (x) == 12)
26260 return true;
26262 df_ref def;
26263 FOR_EACH_INSN_DEF (def, insn)
26265 rtx x = DF_REF_REG (def);
26266 if (REG_P (x) && REGNO (x) == 12)
26267 return false;
26270 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26273 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26275 static bool
26276 rs6000_global_entry_point_needed_p (void)
26278 /* Only needed for the ELFv2 ABI. */
26279 if (DEFAULT_ABI != ABI_ELFv2)
26280 return false;
26282 /* With -msingle-pic-base, we assume the whole program shares the same
26283 TOC, so no global entry point prologues are needed anywhere. */
26284 if (TARGET_SINGLE_PIC_BASE)
26285 return false;
26287 /* Ensure we have a global entry point for thunks. ??? We could
26288 avoid that if the target routine doesn't need a global entry point,
26289 but we do not know whether this is the case at this point. */
26290 if (cfun->is_thunk)
26291 return true;
26293 /* For regular functions, rs6000_emit_prologue sets this flag if the
26294 routine ever uses the TOC pointer. */
26295 return cfun->machine->r2_setup_needed;
26298 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
26299 static sbitmap
26300 rs6000_get_separate_components (void)
26302 rs6000_stack_t *info = rs6000_stack_info ();
26304 if (WORLD_SAVE_P (info))
26305 return NULL;
26307 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
26308 && !(info->savres_strategy & REST_MULTIPLE));
26310 /* Component 0 is the save/restore of LR (done via GPR0).
26311 Components 13..31 are the save/restore of GPR13..GPR31.
26312 Components 46..63 are the save/restore of FPR14..FPR31. */
26314 cfun->machine->n_components = 64;
26316 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26317 bitmap_clear (components);
26319 int reg_size = TARGET_32BIT ? 4 : 8;
26320 int fp_reg_size = 8;
26322 /* The GPRs we need saved to the frame. */
26323 if ((info->savres_strategy & SAVE_INLINE_GPRS)
26324 && (info->savres_strategy & REST_INLINE_GPRS))
26326 int offset = info->gp_save_offset;
26327 if (info->push_p)
26328 offset += info->total_size;
26330 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26332 if (IN_RANGE (offset, -0x8000, 0x7fff)
26333 && rs6000_reg_live_or_pic_offset_p (regno))
26334 bitmap_set_bit (components, regno);
26336 offset += reg_size;
26340 /* Don't mess with the hard frame pointer. */
26341 if (frame_pointer_needed)
26342 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
26344 /* Don't mess with the fixed TOC register. */
26345 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
26346 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26347 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26348 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
26350 /* The FPRs we need saved to the frame. */
26351 if ((info->savres_strategy & SAVE_INLINE_FPRS)
26352 && (info->savres_strategy & REST_INLINE_FPRS))
26354 int offset = info->fp_save_offset;
26355 if (info->push_p)
26356 offset += info->total_size;
26358 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26360 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
26361 bitmap_set_bit (components, regno);
26363 offset += fp_reg_size;
26367 /* Optimize LR save and restore if we can. This is component 0. Any
26368 out-of-line register save/restore routines need LR. */
26369 if (info->lr_save_p
26370 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26371 && (info->savres_strategy & SAVE_INLINE_GPRS)
26372 && (info->savres_strategy & REST_INLINE_GPRS)
26373 && (info->savres_strategy & SAVE_INLINE_FPRS)
26374 && (info->savres_strategy & REST_INLINE_FPRS)
26375 && (info->savres_strategy & SAVE_INLINE_VRS)
26376 && (info->savres_strategy & REST_INLINE_VRS))
26378 int offset = info->lr_save_offset;
26379 if (info->push_p)
26380 offset += info->total_size;
26381 if (IN_RANGE (offset, -0x8000, 0x7fff))
26382 bitmap_set_bit (components, 0);
26385 return components;
26388 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
26389 static sbitmap
26390 rs6000_components_for_bb (basic_block bb)
26392 rs6000_stack_t *info = rs6000_stack_info ();
26394 bitmap in = DF_LIVE_IN (bb);
26395 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
26396 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
26398 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26399 bitmap_clear (components);
26401 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26403 /* GPRs. */
26404 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26405 if (bitmap_bit_p (in, regno)
26406 || bitmap_bit_p (gen, regno)
26407 || bitmap_bit_p (kill, regno))
26408 bitmap_set_bit (components, regno);
26410 /* FPRs. */
26411 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26412 if (bitmap_bit_p (in, regno)
26413 || bitmap_bit_p (gen, regno)
26414 || bitmap_bit_p (kill, regno))
26415 bitmap_set_bit (components, regno);
26417 /* The link register. */
26418 if (bitmap_bit_p (in, LR_REGNO)
26419 || bitmap_bit_p (gen, LR_REGNO)
26420 || bitmap_bit_p (kill, LR_REGNO))
26421 bitmap_set_bit (components, 0);
26423 return components;
26426 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26427 static void
26428 rs6000_disqualify_components (sbitmap components, edge e,
26429 sbitmap edge_components, bool /*is_prologue*/)
26431 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26432 live where we want to place that code. */
26433 if (bitmap_bit_p (edge_components, 0)
26434 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26436 if (dump_file)
26437 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26438 "on entry to bb %d\n", e->dest->index);
26439 bitmap_clear_bit (components, 0);
26443 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26444 static void
26445 rs6000_emit_prologue_components (sbitmap components)
26447 rs6000_stack_t *info = rs6000_stack_info ();
26448 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26449 ? HARD_FRAME_POINTER_REGNUM
26450 : STACK_POINTER_REGNUM);
26452 machine_mode reg_mode = Pmode;
26453 int reg_size = TARGET_32BIT ? 4 : 8;
26454 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26455 ? DFmode : SFmode;
26456 int fp_reg_size = 8;
26458 /* Prologue for LR. */
26459 if (bitmap_bit_p (components, 0))
26461 rtx reg = gen_rtx_REG (reg_mode, 0);
26462 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
26463 RTX_FRAME_RELATED_P (insn) = 1;
26464 add_reg_note (insn, REG_CFA_REGISTER, NULL);
26466 int offset = info->lr_save_offset;
26467 if (info->push_p)
26468 offset += info->total_size;
26470 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26471 RTX_FRAME_RELATED_P (insn) = 1;
26472 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26473 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26474 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26477 /* Prologue for the GPRs. */
26478 int offset = info->gp_save_offset;
26479 if (info->push_p)
26480 offset += info->total_size;
26482 for (int i = info->first_gp_reg_save; i < 32; i++)
26484 if (bitmap_bit_p (components, i))
26486 rtx reg = gen_rtx_REG (reg_mode, i);
26487 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26488 RTX_FRAME_RELATED_P (insn) = 1;
26489 rtx set = copy_rtx (single_set (insn));
26490 add_reg_note (insn, REG_CFA_OFFSET, set);
26493 offset += reg_size;
26496 /* Prologue for the FPRs. */
26497 offset = info->fp_save_offset;
26498 if (info->push_p)
26499 offset += info->total_size;
26501 for (int i = info->first_fp_reg_save; i < 64; i++)
26503 if (bitmap_bit_p (components, i))
26505 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26506 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26507 RTX_FRAME_RELATED_P (insn) = 1;
26508 rtx set = copy_rtx (single_set (insn));
26509 add_reg_note (insn, REG_CFA_OFFSET, set);
26512 offset += fp_reg_size;
26516 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26517 static void
26518 rs6000_emit_epilogue_components (sbitmap components)
26520 rs6000_stack_t *info = rs6000_stack_info ();
26521 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26522 ? HARD_FRAME_POINTER_REGNUM
26523 : STACK_POINTER_REGNUM);
26525 machine_mode reg_mode = Pmode;
26526 int reg_size = TARGET_32BIT ? 4 : 8;
26528 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26529 ? DFmode : SFmode;
26530 int fp_reg_size = 8;
26532 /* Epilogue for the FPRs. */
26533 int offset = info->fp_save_offset;
26534 if (info->push_p)
26535 offset += info->total_size;
26537 for (int i = info->first_fp_reg_save; i < 64; i++)
26539 if (bitmap_bit_p (components, i))
26541 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26542 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26543 RTX_FRAME_RELATED_P (insn) = 1;
26544 add_reg_note (insn, REG_CFA_RESTORE, reg);
26547 offset += fp_reg_size;
26550 /* Epilogue for the GPRs. */
26551 offset = info->gp_save_offset;
26552 if (info->push_p)
26553 offset += info->total_size;
26555 for (int i = info->first_gp_reg_save; i < 32; i++)
26557 if (bitmap_bit_p (components, i))
26559 rtx reg = gen_rtx_REG (reg_mode, i);
26560 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26561 RTX_FRAME_RELATED_P (insn) = 1;
26562 add_reg_note (insn, REG_CFA_RESTORE, reg);
26565 offset += reg_size;
26568 /* Epilogue for LR. */
26569 if (bitmap_bit_p (components, 0))
26571 int offset = info->lr_save_offset;
26572 if (info->push_p)
26573 offset += info->total_size;
26575 rtx reg = gen_rtx_REG (reg_mode, 0);
26576 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26578 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26579 insn = emit_move_insn (lr, reg);
26580 RTX_FRAME_RELATED_P (insn) = 1;
26581 add_reg_note (insn, REG_CFA_RESTORE, lr);
26585 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26586 static void
26587 rs6000_set_handled_components (sbitmap components)
26589 rs6000_stack_t *info = rs6000_stack_info ();
26591 for (int i = info->first_gp_reg_save; i < 32; i++)
26592 if (bitmap_bit_p (components, i))
26593 cfun->machine->gpr_is_wrapped_separately[i] = true;
26595 for (int i = info->first_fp_reg_save; i < 64; i++)
26596 if (bitmap_bit_p (components, i))
26597 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26599 if (bitmap_bit_p (components, 0))
26600 cfun->machine->lr_is_wrapped_separately = true;
26603 /* VRSAVE is a bit vector representing which AltiVec registers
26604 are used. The OS uses this to determine which vector
26605 registers to save on a context switch. We need to save
26606 VRSAVE on the stack frame, add whatever AltiVec registers we
26607 used in this function, and do the corresponding magic in the
26608 epilogue. */
26609 static void
26610 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26611 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26613 /* Get VRSAVE into a GPR. */
26614 rtx reg = gen_rtx_REG (SImode, save_regno);
26615 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26616 if (TARGET_MACHO)
26617 emit_insn (gen_get_vrsave_internal (reg));
26618 else
26619 emit_insn (gen_rtx_SET (reg, vrsave));
26621 /* Save VRSAVE. */
26622 int offset = info->vrsave_save_offset + frame_off;
26623 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26625 /* Include the registers in the mask. */
26626 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26628 emit_insn (generate_set_vrsave (reg, info, 0));
26631 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26632 called, it left the arg pointer to the old stack in r29. Otherwise, the
26633 arg pointer is the top of the current frame. */
26634 static void
26635 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26636 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26638 cfun->machine->split_stack_argp_used = true;
26640 if (sp_adjust)
26642 rtx r12 = gen_rtx_REG (Pmode, 12);
26643 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26644 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26645 emit_insn_before (set_r12, sp_adjust);
26647 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26649 rtx r12 = gen_rtx_REG (Pmode, 12);
26650 if (frame_off == 0)
26651 emit_move_insn (r12, frame_reg_rtx);
26652 else
26653 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26656 if (info->push_p)
26658 rtx r12 = gen_rtx_REG (Pmode, 12);
26659 rtx r29 = gen_rtx_REG (Pmode, 29);
26660 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26661 rtx not_more = gen_label_rtx ();
26662 rtx jump;
26664 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26665 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26666 gen_rtx_LABEL_REF (VOIDmode, not_more),
26667 pc_rtx);
26668 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26669 JUMP_LABEL (jump) = not_more;
26670 LABEL_NUSES (not_more) += 1;
26671 emit_move_insn (r12, r29);
26672 emit_label (not_more);
26676 /* Emit function prologue as insns. */
26678 void
26679 rs6000_emit_prologue (void)
26681 rs6000_stack_t *info = rs6000_stack_info ();
26682 machine_mode reg_mode = Pmode;
26683 int reg_size = TARGET_32BIT ? 4 : 8;
26684 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26685 ? DFmode : SFmode;
26686 int fp_reg_size = 8;
26687 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26688 rtx frame_reg_rtx = sp_reg_rtx;
26689 unsigned int cr_save_regno;
26690 rtx cr_save_rtx = NULL_RTX;
26691 rtx_insn *insn;
26692 int strategy;
26693 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26694 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26695 && call_used_regs[STATIC_CHAIN_REGNUM]);
26696 int using_split_stack = (flag_split_stack
26697 && (lookup_attribute ("no_split_stack",
26698 DECL_ATTRIBUTES (cfun->decl))
26699 == NULL));
26701 /* Offset to top of frame for frame_reg and sp respectively. */
26702 HOST_WIDE_INT frame_off = 0;
26703 HOST_WIDE_INT sp_off = 0;
26704 /* sp_adjust is the stack adjusting instruction, tracked so that the
26705 insn setting up the split-stack arg pointer can be emitted just
26706 prior to it, when r12 is not used here for other purposes. */
26707 rtx_insn *sp_adjust = 0;
26709 #if CHECKING_P
26710 /* Track and check usage of r0, r11, r12. */
26711 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26712 #define START_USE(R) do \
26714 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26715 reg_inuse |= 1 << (R); \
26716 } while (0)
26717 #define END_USE(R) do \
26719 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26720 reg_inuse &= ~(1 << (R)); \
26721 } while (0)
26722 #define NOT_INUSE(R) do \
26724 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26725 } while (0)
26726 #else
26727 #define START_USE(R) do {} while (0)
26728 #define END_USE(R) do {} while (0)
26729 #define NOT_INUSE(R) do {} while (0)
26730 #endif
26732 if (DEFAULT_ABI == ABI_ELFv2
26733 && !TARGET_SINGLE_PIC_BASE)
26735 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26737 /* With -mminimal-toc we may generate an extra use of r2 below. */
26738 if (TARGET_TOC && TARGET_MINIMAL_TOC
26739 && !constant_pool_empty_p ())
26740 cfun->machine->r2_setup_needed = true;
26744 if (flag_stack_usage_info)
26745 current_function_static_stack_size = info->total_size;
26747 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26749 HOST_WIDE_INT size = info->total_size;
26751 if (crtl->is_leaf && !cfun->calls_alloca)
26753 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
26754 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
26755 size - STACK_CHECK_PROTECT);
26757 else if (size > 0)
26758 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
26761 if (TARGET_FIX_AND_CONTINUE)
26763 /* gdb on darwin arranges to forward a function from the old
26764 address by modifying the first 5 instructions of the function
26765 to branch to the overriding function. This is necessary to
26766 permit function pointers that point to the old function to
26767 actually forward to the new function. */
26768 emit_insn (gen_nop ());
26769 emit_insn (gen_nop ());
26770 emit_insn (gen_nop ());
26771 emit_insn (gen_nop ());
26772 emit_insn (gen_nop ());
26775 /* Handle world saves specially here. */
26776 if (WORLD_SAVE_P (info))
26778 int i, j, sz;
26779 rtx treg;
26780 rtvec p;
26781 rtx reg0;
26783 /* save_world expects lr in r0. */
26784 reg0 = gen_rtx_REG (Pmode, 0);
26785 if (info->lr_save_p)
26787 insn = emit_move_insn (reg0,
26788 gen_rtx_REG (Pmode, LR_REGNO));
26789 RTX_FRAME_RELATED_P (insn) = 1;
26792 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26793 assumptions about the offsets of various bits of the stack
26794 frame. */
26795 gcc_assert (info->gp_save_offset == -220
26796 && info->fp_save_offset == -144
26797 && info->lr_save_offset == 8
26798 && info->cr_save_offset == 4
26799 && info->push_p
26800 && info->lr_save_p
26801 && (!crtl->calls_eh_return
26802 || info->ehrd_offset == -432)
26803 && info->vrsave_save_offset == -224
26804 && info->altivec_save_offset == -416);
26806 treg = gen_rtx_REG (SImode, 11);
26807 emit_move_insn (treg, GEN_INT (-info->total_size));
26809 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26810 in R11. It also clobbers R12, so beware! */
26812 /* Preserve CR2 for save_world prologues */
26813 sz = 5;
26814 sz += 32 - info->first_gp_reg_save;
26815 sz += 64 - info->first_fp_reg_save;
26816 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26817 p = rtvec_alloc (sz);
26818 j = 0;
26819 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
26820 gen_rtx_REG (SImode,
26821 LR_REGNO));
26822 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26823 gen_rtx_SYMBOL_REF (Pmode,
26824 "*save_world"));
26825 /* We do floats first so that the instruction pattern matches
26826 properly. */
26827 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26828 RTVEC_ELT (p, j++)
26829 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26830 ? DFmode : SFmode,
26831 info->first_fp_reg_save + i),
26832 frame_reg_rtx,
26833 info->fp_save_offset + frame_off + 8 * i);
26834 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26835 RTVEC_ELT (p, j++)
26836 = gen_frame_store (gen_rtx_REG (V4SImode,
26837 info->first_altivec_reg_save + i),
26838 frame_reg_rtx,
26839 info->altivec_save_offset + frame_off + 16 * i);
26840 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26841 RTVEC_ELT (p, j++)
26842 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26843 frame_reg_rtx,
26844 info->gp_save_offset + frame_off + reg_size * i);
26846 /* CR register traditionally saved as CR2. */
26847 RTVEC_ELT (p, j++)
26848 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26849 frame_reg_rtx, info->cr_save_offset + frame_off);
26850 /* Explain about use of R0. */
26851 if (info->lr_save_p)
26852 RTVEC_ELT (p, j++)
26853 = gen_frame_store (reg0,
26854 frame_reg_rtx, info->lr_save_offset + frame_off);
26855 /* Explain what happens to the stack pointer. */
26857 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
26858 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
26861 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26862 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26863 treg, GEN_INT (-info->total_size));
26864 sp_off = frame_off = info->total_size;
26867 strategy = info->savres_strategy;
26869 /* For V.4, update stack before we do any saving and set back pointer. */
26870 if (! WORLD_SAVE_P (info)
26871 && info->push_p
26872 && (DEFAULT_ABI == ABI_V4
26873 || crtl->calls_eh_return))
26875 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
26876 || !(strategy & SAVE_INLINE_GPRS)
26877 || !(strategy & SAVE_INLINE_VRS));
26878 int ptr_regno = -1;
26879 rtx ptr_reg = NULL_RTX;
26880 int ptr_off = 0;
26882 if (info->total_size < 32767)
26883 frame_off = info->total_size;
26884 else if (need_r11)
26885 ptr_regno = 11;
26886 else if (info->cr_save_p
26887 || info->lr_save_p
26888 || info->first_fp_reg_save < 64
26889 || info->first_gp_reg_save < 32
26890 || info->altivec_size != 0
26891 || info->vrsave_size != 0
26892 || crtl->calls_eh_return)
26893 ptr_regno = 12;
26894 else
26896 /* The prologue won't be saving any regs so there is no need
26897 to set up a frame register to access any frame save area.
26898 We also won't be using frame_off anywhere below, but set
26899 the correct value anyway to protect against future
26900 changes to this function. */
26901 frame_off = info->total_size;
26903 if (ptr_regno != -1)
26905 /* Set up the frame offset to that needed by the first
26906 out-of-line save function. */
26907 START_USE (ptr_regno);
26908 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26909 frame_reg_rtx = ptr_reg;
26910 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
26911 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
26912 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
26913 ptr_off = info->gp_save_offset + info->gp_size;
26914 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
26915 ptr_off = info->altivec_save_offset + info->altivec_size;
26916 frame_off = -ptr_off;
26918 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26919 ptr_reg, ptr_off);
26920 if (REGNO (frame_reg_rtx) == 12)
26921 sp_adjust = 0;
26922 sp_off = info->total_size;
26923 if (frame_reg_rtx != sp_reg_rtx)
26924 rs6000_emit_stack_tie (frame_reg_rtx, false);
26927 /* If we use the link register, get it into r0. */
26928 if (!WORLD_SAVE_P (info) && info->lr_save_p
26929 && !cfun->machine->lr_is_wrapped_separately)
26931 rtx addr, reg, mem;
26933 reg = gen_rtx_REG (Pmode, 0);
26934 START_USE (0);
26935 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
26936 RTX_FRAME_RELATED_P (insn) = 1;
26938 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
26939 | SAVE_NOINLINE_FPRS_SAVES_LR)))
26941 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26942 GEN_INT (info->lr_save_offset + frame_off));
26943 mem = gen_rtx_MEM (Pmode, addr);
26944 /* This should not be of rs6000_sr_alias_set, because of
26945 __builtin_return_address. */
26947 insn = emit_move_insn (mem, reg);
26948 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26949 NULL_RTX, NULL_RTX);
26950 END_USE (0);
26954 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
26955 r12 will be needed by out-of-line gpr restore. */
26956 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26957 && !(strategy & (SAVE_INLINE_GPRS
26958 | SAVE_NOINLINE_GPRS_SAVES_LR))
26959 ? 11 : 12);
26960 if (!WORLD_SAVE_P (info)
26961 && info->cr_save_p
26962 && REGNO (frame_reg_rtx) != cr_save_regno
26963 && !(using_static_chain_p && cr_save_regno == 11)
26964 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
26966 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
26967 START_USE (cr_save_regno);
26968 rs6000_emit_move_from_cr (cr_save_rtx);
26971 /* Do any required saving of fpr's. If only one or two to save, do
26972 it ourselves. Otherwise, call function. */
26973 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
26975 int offset = info->fp_save_offset + frame_off;
26976 for (int i = info->first_fp_reg_save; i < 64; i++)
26978 if (save_reg_p (i)
26979 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
26980 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
26981 sp_off - frame_off);
26983 offset += fp_reg_size;
26986 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
26988 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26989 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26990 unsigned ptr_regno = ptr_regno_for_savres (sel);
26991 rtx ptr_reg = frame_reg_rtx;
26993 if (REGNO (frame_reg_rtx) == ptr_regno)
26994 gcc_checking_assert (frame_off == 0);
26995 else
26997 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26998 NOT_INUSE (ptr_regno);
26999 emit_insn (gen_add3_insn (ptr_reg,
27000 frame_reg_rtx, GEN_INT (frame_off)));
27002 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27003 info->fp_save_offset,
27004 info->lr_save_offset,
27005 DFmode, sel);
27006 rs6000_frame_related (insn, ptr_reg, sp_off,
27007 NULL_RTX, NULL_RTX);
27008 if (lr)
27009 END_USE (0);
27012 /* Save GPRs. This is done as a PARALLEL if we are using
27013 the store-multiple instructions. */
27014 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
27016 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
27017 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
27018 unsigned ptr_regno = ptr_regno_for_savres (sel);
27019 rtx ptr_reg = frame_reg_rtx;
27020 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
27021 int end_save = info->gp_save_offset + info->gp_size;
27022 int ptr_off;
27024 if (ptr_regno == 12)
27025 sp_adjust = 0;
27026 if (!ptr_set_up)
27027 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27029 /* Need to adjust r11 (r12) if we saved any FPRs. */
27030 if (end_save + frame_off != 0)
27032 rtx offset = GEN_INT (end_save + frame_off);
27034 if (ptr_set_up)
27035 frame_off = -end_save;
27036 else
27037 NOT_INUSE (ptr_regno);
27038 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27040 else if (!ptr_set_up)
27042 NOT_INUSE (ptr_regno);
27043 emit_move_insn (ptr_reg, frame_reg_rtx);
27045 ptr_off = -end_save;
27046 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27047 info->gp_save_offset + ptr_off,
27048 info->lr_save_offset + ptr_off,
27049 reg_mode, sel);
27050 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27051 NULL_RTX, NULL_RTX);
27052 if (lr)
27053 END_USE (0);
27055 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27057 rtvec p;
27058 int i;
27059 p = rtvec_alloc (32 - info->first_gp_reg_save);
27060 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27061 RTVEC_ELT (p, i)
27062 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27063 frame_reg_rtx,
27064 info->gp_save_offset + frame_off + reg_size * i);
27065 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27066 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27067 NULL_RTX, NULL_RTX);
27069 else if (!WORLD_SAVE_P (info))
27071 int offset = info->gp_save_offset + frame_off;
27072 for (int i = info->first_gp_reg_save; i < 32; i++)
27074 if (rs6000_reg_live_or_pic_offset_p (i)
27075 && !cfun->machine->gpr_is_wrapped_separately[i])
27076 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
27077 sp_off - frame_off);
27079 offset += reg_size;
27083 if (crtl->calls_eh_return)
27085 unsigned int i;
27086 rtvec p;
27088 for (i = 0; ; ++i)
27090 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27091 if (regno == INVALID_REGNUM)
27092 break;
27095 p = rtvec_alloc (i);
27097 for (i = 0; ; ++i)
27099 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27100 if (regno == INVALID_REGNUM)
27101 break;
27103 rtx set
27104 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27105 sp_reg_rtx,
27106 info->ehrd_offset + sp_off + reg_size * (int) i);
27107 RTVEC_ELT (p, i) = set;
27108 RTX_FRAME_RELATED_P (set) = 1;
27111 insn = emit_insn (gen_blockage ());
27112 RTX_FRAME_RELATED_P (insn) = 1;
27113 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27116 /* In AIX ABI we need to make sure r2 is really saved. */
27117 if (TARGET_AIX && crtl->calls_eh_return)
27119 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27120 rtx join_insn, note;
27121 rtx_insn *save_insn;
27122 long toc_restore_insn;
27124 tmp_reg = gen_rtx_REG (Pmode, 11);
27125 tmp_reg_si = gen_rtx_REG (SImode, 11);
27126 if (using_static_chain_p)
27128 START_USE (0);
27129 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27131 else
27132 START_USE (11);
27133 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27134 /* Peek at instruction to which this function returns. If it's
27135 restoring r2, then we know we've already saved r2. We can't
27136 unconditionally save r2 because the value we have will already
27137 be updated if we arrived at this function via a plt call or
27138 toc adjusting stub. */
27139 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27140 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27141 + RS6000_TOC_SAVE_SLOT);
27142 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27143 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27144 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27145 validate_condition_mode (EQ, CCUNSmode);
27146 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27147 emit_insn (gen_rtx_SET (compare_result,
27148 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27149 toc_save_done = gen_label_rtx ();
27150 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27151 gen_rtx_EQ (VOIDmode, compare_result,
27152 const0_rtx),
27153 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27154 pc_rtx);
27155 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27156 JUMP_LABEL (jump) = toc_save_done;
27157 LABEL_NUSES (toc_save_done) += 1;
27159 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27160 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27161 sp_off - frame_off);
27163 emit_label (toc_save_done);
27165 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27166 have a CFG that has different saves along different paths.
27167 Move the note to a dummy blockage insn, which describes that
27168 R2 is unconditionally saved after the label. */
27169 /* ??? An alternate representation might be a special insn pattern
27170 containing both the branch and the store. That might let the
27171 code that minimizes the number of DW_CFA_advance opcodes better
27172 freedom in placing the annotations. */
27173 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27174 if (note)
27175 remove_note (save_insn, note);
27176 else
27177 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27178 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27179 RTX_FRAME_RELATED_P (save_insn) = 0;
27181 join_insn = emit_insn (gen_blockage ());
27182 REG_NOTES (join_insn) = note;
27183 RTX_FRAME_RELATED_P (join_insn) = 1;
27185 if (using_static_chain_p)
27187 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27188 END_USE (0);
27190 else
27191 END_USE (11);
27194 /* Save CR if we use any that must be preserved. */
27195 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27197 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27198 GEN_INT (info->cr_save_offset + frame_off));
27199 rtx mem = gen_frame_mem (SImode, addr);
27201 /* If we didn't copy cr before, do so now using r0. */
27202 if (cr_save_rtx == NULL_RTX)
27204 START_USE (0);
27205 cr_save_rtx = gen_rtx_REG (SImode, 0);
27206 rs6000_emit_move_from_cr (cr_save_rtx);
27209 /* Saving CR requires a two-instruction sequence: one instruction
27210 to move the CR to a general-purpose register, and a second
27211 instruction that stores the GPR to memory.
27213 We do not emit any DWARF CFI records for the first of these,
27214 because we cannot properly represent the fact that CR is saved in
27215 a register. One reason is that we cannot express that multiple
27216 CR fields are saved; another reason is that on 64-bit, the size
27217 of the CR register in DWARF (4 bytes) differs from the size of
27218 a general-purpose register.
27220 This means if any intervening instruction were to clobber one of
27221 the call-saved CR fields, we'd have incorrect CFI. To prevent
27222 this from happening, we mark the store to memory as a use of
27223 those CR fields, which prevents any such instruction from being
27224 scheduled in between the two instructions. */
27225 rtx crsave_v[9];
27226 int n_crsave = 0;
27227 int i;
27229 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27230 for (i = 0; i < 8; i++)
27231 if (save_reg_p (CR0_REGNO + i))
27232 crsave_v[n_crsave++]
27233 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27235 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27236 gen_rtvec_v (n_crsave, crsave_v)));
27237 END_USE (REGNO (cr_save_rtx));
27239 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27240 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27241 so we need to construct a frame expression manually. */
27242 RTX_FRAME_RELATED_P (insn) = 1;
27244 /* Update address to be stack-pointer relative, like
27245 rs6000_frame_related would do. */
27246 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27247 GEN_INT (info->cr_save_offset + sp_off));
27248 mem = gen_frame_mem (SImode, addr);
27250 if (DEFAULT_ABI == ABI_ELFv2)
27252 /* In the ELFv2 ABI we generate separate CFI records for each
27253 CR field that was actually saved. They all point to the
27254 same 32-bit stack slot. */
27255 rtx crframe[8];
27256 int n_crframe = 0;
27258 for (i = 0; i < 8; i++)
27259 if (save_reg_p (CR0_REGNO + i))
27261 crframe[n_crframe]
27262 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27264 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27265 n_crframe++;
27268 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27269 gen_rtx_PARALLEL (VOIDmode,
27270 gen_rtvec_v (n_crframe, crframe)));
27272 else
27274 /* In other ABIs, by convention, we use a single CR regnum to
27275 represent the fact that all call-saved CR fields are saved.
27276 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27277 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27278 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27282 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27283 *separate* slots if the routine calls __builtin_eh_return, so
27284 that they can be independently restored by the unwinder. */
27285 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27287 int i, cr_off = info->ehcr_offset;
27288 rtx crsave;
27290 /* ??? We might get better performance by using multiple mfocrf
27291 instructions. */
27292 crsave = gen_rtx_REG (SImode, 0);
27293 emit_insn (gen_movesi_from_cr (crsave));
27295 for (i = 0; i < 8; i++)
27296 if (!call_used_regs[CR0_REGNO + i])
27298 rtvec p = rtvec_alloc (2);
27299 RTVEC_ELT (p, 0)
27300 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27301 RTVEC_ELT (p, 1)
27302 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27304 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27306 RTX_FRAME_RELATED_P (insn) = 1;
27307 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27308 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
27309 sp_reg_rtx, cr_off + sp_off));
27311 cr_off += reg_size;
27315 /* Update stack and set back pointer unless this is V.4,
27316 for which it was done previously. */
27317 if (!WORLD_SAVE_P (info) && info->push_p
27318 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
27320 rtx ptr_reg = NULL;
27321 int ptr_off = 0;
27323 /* If saving altivec regs we need to be able to address all save
27324 locations using a 16-bit offset. */
27325 if ((strategy & SAVE_INLINE_VRS) == 0
27326 || (info->altivec_size != 0
27327 && (info->altivec_save_offset + info->altivec_size - 16
27328 + info->total_size - frame_off) > 32767)
27329 || (info->vrsave_size != 0
27330 && (info->vrsave_save_offset
27331 + info->total_size - frame_off) > 32767))
27333 int sel = SAVRES_SAVE | SAVRES_VR;
27334 unsigned ptr_regno = ptr_regno_for_savres (sel);
27336 if (using_static_chain_p
27337 && ptr_regno == STATIC_CHAIN_REGNUM)
27338 ptr_regno = 12;
27339 if (REGNO (frame_reg_rtx) != ptr_regno)
27340 START_USE (ptr_regno);
27341 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27342 frame_reg_rtx = ptr_reg;
27343 ptr_off = info->altivec_save_offset + info->altivec_size;
27344 frame_off = -ptr_off;
27346 else if (REGNO (frame_reg_rtx) == 1)
27347 frame_off = info->total_size;
27348 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27349 ptr_reg, ptr_off);
27350 if (REGNO (frame_reg_rtx) == 12)
27351 sp_adjust = 0;
27352 sp_off = info->total_size;
27353 if (frame_reg_rtx != sp_reg_rtx)
27354 rs6000_emit_stack_tie (frame_reg_rtx, false);
27357 /* Set frame pointer, if needed. */
27358 if (frame_pointer_needed)
27360 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27361 sp_reg_rtx);
27362 RTX_FRAME_RELATED_P (insn) = 1;
27365 /* Save AltiVec registers if needed. Save here because the red zone does
27366 not always include AltiVec registers. */
27367 if (!WORLD_SAVE_P (info)
27368 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27370 int end_save = info->altivec_save_offset + info->altivec_size;
27371 int ptr_off;
27372 /* Oddly, the vector save/restore functions point r0 at the end
27373 of the save area, then use r11 or r12 to load offsets for
27374 [reg+reg] addressing. */
27375 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27376 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27377 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27379 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27380 NOT_INUSE (0);
27381 if (scratch_regno == 12)
27382 sp_adjust = 0;
27383 if (end_save + frame_off != 0)
27385 rtx offset = GEN_INT (end_save + frame_off);
27387 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27389 else
27390 emit_move_insn (ptr_reg, frame_reg_rtx);
27392 ptr_off = -end_save;
27393 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27394 info->altivec_save_offset + ptr_off,
27395 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27396 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27397 NULL_RTX, NULL_RTX);
27398 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27400 /* The oddity mentioned above clobbered our frame reg. */
27401 emit_move_insn (frame_reg_rtx, ptr_reg);
27402 frame_off = ptr_off;
27405 else if (!WORLD_SAVE_P (info)
27406 && info->altivec_size != 0)
27408 int i;
27410 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27411 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27413 rtx areg, savereg, mem;
27414 HOST_WIDE_INT offset;
27416 offset = (info->altivec_save_offset + frame_off
27417 + 16 * (i - info->first_altivec_reg_save));
27419 savereg = gen_rtx_REG (V4SImode, i);
27421 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
27423 mem = gen_frame_mem (V4SImode,
27424 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27425 GEN_INT (offset)));
27426 insn = emit_insn (gen_rtx_SET (mem, savereg));
27427 areg = NULL_RTX;
27429 else
27431 NOT_INUSE (0);
27432 areg = gen_rtx_REG (Pmode, 0);
27433 emit_move_insn (areg, GEN_INT (offset));
27435 /* AltiVec addressing mode is [reg+reg]. */
27436 mem = gen_frame_mem (V4SImode,
27437 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27439 /* Rather than emitting a generic move, force use of the stvx
27440 instruction, which we always want on ISA 2.07 (power8) systems.
27441 In particular we don't want xxpermdi/stxvd2x for little
27442 endian. */
27443 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27446 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27447 areg, GEN_INT (offset));
27451 /* VRSAVE is a bit vector representing which AltiVec registers
27452 are used. The OS uses this to determine which vector
27453 registers to save on a context switch. We need to save
27454 VRSAVE on the stack frame, add whatever AltiVec registers we
27455 used in this function, and do the corresponding magic in the
27456 epilogue. */
27458 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27460 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27461 be using r12 as frame_reg_rtx and r11 as the static chain
27462 pointer for nested functions. */
27463 int save_regno = 12;
27464 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27465 && !using_static_chain_p)
27466 save_regno = 11;
27467 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27469 save_regno = 11;
27470 if (using_static_chain_p)
27471 save_regno = 0;
27473 NOT_INUSE (save_regno);
27475 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27478 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27479 if (!TARGET_SINGLE_PIC_BASE
27480 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27481 && !constant_pool_empty_p ())
27482 || (DEFAULT_ABI == ABI_V4
27483 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27484 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27486 /* If emit_load_toc_table will use the link register, we need to save
27487 it. We use R12 for this purpose because emit_load_toc_table
27488 can use register 0. This allows us to use a plain 'blr' to return
27489 from the procedure more often. */
27490 int save_LR_around_toc_setup = (TARGET_ELF
27491 && DEFAULT_ABI == ABI_V4
27492 && flag_pic
27493 && ! info->lr_save_p
27494 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27495 if (save_LR_around_toc_setup)
27497 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27498 rtx tmp = gen_rtx_REG (Pmode, 12);
27500 sp_adjust = 0;
27501 insn = emit_move_insn (tmp, lr);
27502 RTX_FRAME_RELATED_P (insn) = 1;
27504 rs6000_emit_load_toc_table (TRUE);
27506 insn = emit_move_insn (lr, tmp);
27507 add_reg_note (insn, REG_CFA_RESTORE, lr);
27508 RTX_FRAME_RELATED_P (insn) = 1;
27510 else
27511 rs6000_emit_load_toc_table (TRUE);
27514 #if TARGET_MACHO
27515 if (!TARGET_SINGLE_PIC_BASE
27516 && DEFAULT_ABI == ABI_DARWIN
27517 && flag_pic && crtl->uses_pic_offset_table)
27519 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27520 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27522 /* Save and restore LR locally around this call (in R0). */
27523 if (!info->lr_save_p)
27524 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27526 emit_insn (gen_load_macho_picbase (src));
27528 emit_move_insn (gen_rtx_REG (Pmode,
27529 RS6000_PIC_OFFSET_TABLE_REGNUM),
27530 lr);
27532 if (!info->lr_save_p)
27533 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27535 #endif
27537 /* If we need to, save the TOC register after doing the stack setup.
27538 Do not emit eh frame info for this save. The unwinder wants info,
27539 conceptually attached to instructions in this function, about
27540 register values in the caller of this function. This R2 may have
27541 already been changed from the value in the caller.
27542 We don't attempt to write accurate DWARF EH frame info for R2
27543 because code emitted by gcc for a (non-pointer) function call
27544 doesn't save and restore R2. Instead, R2 is managed out-of-line
27545 by a linker generated plt call stub when the function resides in
27546 a shared library. This behavior is costly to describe in DWARF,
27547 both in terms of the size of DWARF info and the time taken in the
27548 unwinder to interpret it. R2 changes, apart from the
27549 calls_eh_return case earlier in this function, are handled by
27550 linux-unwind.h frob_update_context. */
27551 if (rs6000_save_toc_in_prologue_p ())
27553 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27554 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27557 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27558 if (using_split_stack && split_stack_arg_pointer_used_p ())
27559 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27562 /* Output .extern statements for the save/restore routines we use. */
27564 static void
27565 rs6000_output_savres_externs (FILE *file)
27567 rs6000_stack_t *info = rs6000_stack_info ();
27569 if (TARGET_DEBUG_STACK)
27570 debug_stack_info (info);
27572 /* Write .extern for any function we will call to save and restore
27573 fp values. */
27574 if (info->first_fp_reg_save < 64
27575 && !TARGET_MACHO
27576 && !TARGET_ELF)
27578 char *name;
27579 int regno = info->first_fp_reg_save - 32;
27581 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27583 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27584 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27585 name = rs6000_savres_routine_name (regno, sel);
27586 fprintf (file, "\t.extern %s\n", name);
27588 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27590 bool lr = (info->savres_strategy
27591 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27592 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27593 name = rs6000_savres_routine_name (regno, sel);
27594 fprintf (file, "\t.extern %s\n", name);
27599 /* Write function prologue. */
27601 static void
27602 rs6000_output_function_prologue (FILE *file,
27603 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
27605 if (!cfun->is_thunk)
27606 rs6000_output_savres_externs (file);
27608 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27609 immediately after the global entry point label. */
27610 if (rs6000_global_entry_point_needed_p ())
27612 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27614 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27616 if (TARGET_CMODEL != CMODEL_LARGE)
27618 /* In the small and medium code models, we assume the TOC is less
27619 2 GB away from the text section, so it can be computed via the
27620 following two-instruction sequence. */
27621 char buf[256];
27623 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27624 fprintf (file, "0:\taddis 2,12,.TOC.-");
27625 assemble_name (file, buf);
27626 fprintf (file, "@ha\n");
27627 fprintf (file, "\taddi 2,2,.TOC.-");
27628 assemble_name (file, buf);
27629 fprintf (file, "@l\n");
27631 else
27633 /* In the large code model, we allow arbitrary offsets between the
27634 TOC and the text section, so we have to load the offset from
27635 memory. The data field is emitted directly before the global
27636 entry point in rs6000_elf_declare_function_name. */
27637 char buf[256];
27639 #ifdef HAVE_AS_ENTRY_MARKERS
27640 /* If supported by the linker, emit a marker relocation. If the
27641 total code size of the final executable or shared library
27642 happens to fit into 2 GB after all, the linker will replace
27643 this code sequence with the sequence for the small or medium
27644 code model. */
27645 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27646 #endif
27647 fprintf (file, "\tld 2,");
27648 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27649 assemble_name (file, buf);
27650 fprintf (file, "-");
27651 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27652 assemble_name (file, buf);
27653 fprintf (file, "(12)\n");
27654 fprintf (file, "\tadd 2,2,12\n");
27657 fputs ("\t.localentry\t", file);
27658 assemble_name (file, name);
27659 fputs (",.-", file);
27660 assemble_name (file, name);
27661 fputs ("\n", file);
27664 /* Output -mprofile-kernel code. This needs to be done here instead of
27665 in output_function_profile since it must go after the ELFv2 ABI
27666 local entry point. */
27667 if (TARGET_PROFILE_KERNEL && crtl->profile)
27669 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27670 gcc_assert (!TARGET_32BIT);
27672 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27674 /* In the ELFv2 ABI we have no compiler stack word. It must be
27675 the resposibility of _mcount to preserve the static chain
27676 register if required. */
27677 if (DEFAULT_ABI != ABI_ELFv2
27678 && cfun->static_chain_decl != NULL)
27680 asm_fprintf (file, "\tstd %s,24(%s)\n",
27681 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27682 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27683 asm_fprintf (file, "\tld %s,24(%s)\n",
27684 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27686 else
27687 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27690 rs6000_pic_labelno++;
27693 /* -mprofile-kernel code calls mcount before the function prolog,
27694 so a profiled leaf function should stay a leaf function. */
27695 static bool
27696 rs6000_keep_leaf_when_profiled ()
27698 return TARGET_PROFILE_KERNEL;
27701 /* Non-zero if vmx regs are restored before the frame pop, zero if
27702 we restore after the pop when possible. */
27703 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27705 /* Restoring cr is a two step process: loading a reg from the frame
27706 save, then moving the reg to cr. For ABI_V4 we must let the
27707 unwinder know that the stack location is no longer valid at or
27708 before the stack deallocation, but we can't emit a cfa_restore for
27709 cr at the stack deallocation like we do for other registers.
27710 The trouble is that it is possible for the move to cr to be
27711 scheduled after the stack deallocation. So say exactly where cr
27712 is located on each of the two insns. */
27714 static rtx
27715 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27717 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27718 rtx reg = gen_rtx_REG (SImode, regno);
27719 rtx_insn *insn = emit_move_insn (reg, mem);
27721 if (!exit_func && DEFAULT_ABI == ABI_V4)
27723 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27724 rtx set = gen_rtx_SET (reg, cr);
27726 add_reg_note (insn, REG_CFA_REGISTER, set);
27727 RTX_FRAME_RELATED_P (insn) = 1;
27729 return reg;
27732 /* Reload CR from REG. */
27734 static void
27735 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27737 int count = 0;
27738 int i;
27740 if (using_mfcr_multiple)
27742 for (i = 0; i < 8; i++)
27743 if (save_reg_p (CR0_REGNO + i))
27744 count++;
27745 gcc_assert (count);
27748 if (using_mfcr_multiple && count > 1)
27750 rtx_insn *insn;
27751 rtvec p;
27752 int ndx;
27754 p = rtvec_alloc (count);
27756 ndx = 0;
27757 for (i = 0; i < 8; i++)
27758 if (save_reg_p (CR0_REGNO + i))
27760 rtvec r = rtvec_alloc (2);
27761 RTVEC_ELT (r, 0) = reg;
27762 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27763 RTVEC_ELT (p, ndx) =
27764 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27765 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27766 ndx++;
27768 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27769 gcc_assert (ndx == count);
27771 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27772 CR field separately. */
27773 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27775 for (i = 0; i < 8; i++)
27776 if (save_reg_p (CR0_REGNO + i))
27777 add_reg_note (insn, REG_CFA_RESTORE,
27778 gen_rtx_REG (SImode, CR0_REGNO + i));
27780 RTX_FRAME_RELATED_P (insn) = 1;
27783 else
27784 for (i = 0; i < 8; i++)
27785 if (save_reg_p (CR0_REGNO + i))
27787 rtx insn = emit_insn (gen_movsi_to_cr_one
27788 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27790 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27791 CR field separately, attached to the insn that in fact
27792 restores this particular CR field. */
27793 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27795 add_reg_note (insn, REG_CFA_RESTORE,
27796 gen_rtx_REG (SImode, CR0_REGNO + i));
27798 RTX_FRAME_RELATED_P (insn) = 1;
27802 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27803 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27804 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27806 rtx_insn *insn = get_last_insn ();
27807 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27809 add_reg_note (insn, REG_CFA_RESTORE, cr);
27810 RTX_FRAME_RELATED_P (insn) = 1;
27814 /* Like cr, the move to lr instruction can be scheduled after the
27815 stack deallocation, but unlike cr, its stack frame save is still
27816 valid. So we only need to emit the cfa_restore on the correct
27817 instruction. */
27819 static void
27820 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27822 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27823 rtx reg = gen_rtx_REG (Pmode, regno);
27825 emit_move_insn (reg, mem);
27828 static void
27829 restore_saved_lr (int regno, bool exit_func)
27831 rtx reg = gen_rtx_REG (Pmode, regno);
27832 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27833 rtx_insn *insn = emit_move_insn (lr, reg);
27835 if (!exit_func && flag_shrink_wrap)
27837 add_reg_note (insn, REG_CFA_RESTORE, lr);
27838 RTX_FRAME_RELATED_P (insn) = 1;
27842 static rtx
27843 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27845 if (DEFAULT_ABI == ABI_ELFv2)
27847 int i;
27848 for (i = 0; i < 8; i++)
27849 if (save_reg_p (CR0_REGNO + i))
27851 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27852 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27853 cfa_restores);
27856 else if (info->cr_save_p)
27857 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27858 gen_rtx_REG (SImode, CR2_REGNO),
27859 cfa_restores);
27861 if (info->lr_save_p)
27862 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27863 gen_rtx_REG (Pmode, LR_REGNO),
27864 cfa_restores);
27865 return cfa_restores;
27868 /* Return true if OFFSET from stack pointer can be clobbered by signals.
27869 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
27870 below stack pointer not cloberred by signals. */
27872 static inline bool
27873 offset_below_red_zone_p (HOST_WIDE_INT offset)
27875 return offset < (DEFAULT_ABI == ABI_V4
27877 : TARGET_32BIT ? -220 : -288);
27880 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
27882 static void
27883 emit_cfa_restores (rtx cfa_restores)
27885 rtx_insn *insn = get_last_insn ();
27886 rtx *loc = &REG_NOTES (insn);
27888 while (*loc)
27889 loc = &XEXP (*loc, 1);
27890 *loc = cfa_restores;
27891 RTX_FRAME_RELATED_P (insn) = 1;
27894 /* Emit function epilogue as insns. */
27896 void
27897 rs6000_emit_epilogue (int sibcall)
27899 rs6000_stack_t *info;
27900 int restoring_GPRs_inline;
27901 int restoring_FPRs_inline;
27902 int using_load_multiple;
27903 int using_mtcr_multiple;
27904 int use_backchain_to_restore_sp;
27905 int restore_lr;
27906 int strategy;
27907 HOST_WIDE_INT frame_off = 0;
27908 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
27909 rtx frame_reg_rtx = sp_reg_rtx;
27910 rtx cfa_restores = NULL_RTX;
27911 rtx insn;
27912 rtx cr_save_reg = NULL_RTX;
27913 machine_mode reg_mode = Pmode;
27914 int reg_size = TARGET_32BIT ? 4 : 8;
27915 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
27916 ? DFmode : SFmode;
27917 int fp_reg_size = 8;
27918 int i;
27919 bool exit_func;
27920 unsigned ptr_regno;
27922 info = rs6000_stack_info ();
27924 strategy = info->savres_strategy;
27925 using_load_multiple = strategy & REST_MULTIPLE;
27926 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
27927 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
27928 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
27929 || rs6000_cpu == PROCESSOR_PPC603
27930 || rs6000_cpu == PROCESSOR_PPC750
27931 || optimize_size);
27932 /* Restore via the backchain when we have a large frame, since this
27933 is more efficient than an addis, addi pair. The second condition
27934 here will not trigger at the moment; We don't actually need a
27935 frame pointer for alloca, but the generic parts of the compiler
27936 give us one anyway. */
27937 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
27938 ? info->lr_save_offset
27939 : 0) > 32767
27940 || (cfun->calls_alloca
27941 && !frame_pointer_needed));
27942 restore_lr = (info->lr_save_p
27943 && (restoring_FPRs_inline
27944 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
27945 && (restoring_GPRs_inline
27946 || info->first_fp_reg_save < 64)
27947 && !cfun->machine->lr_is_wrapped_separately);
27950 if (WORLD_SAVE_P (info))
27952 int i, j;
27953 char rname[30];
27954 const char *alloc_rname;
27955 rtvec p;
27957 /* eh_rest_world_r10 will return to the location saved in the LR
27958 stack slot (which is not likely to be our caller.)
27959 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
27960 rest_world is similar, except any R10 parameter is ignored.
27961 The exception-handling stuff that was here in 2.95 is no
27962 longer necessary. */
27964 p = rtvec_alloc (9
27965 + 32 - info->first_gp_reg_save
27966 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
27967 + 63 + 1 - info->first_fp_reg_save);
27969 strcpy (rname, ((crtl->calls_eh_return) ?
27970 "*eh_rest_world_r10" : "*rest_world"));
27971 alloc_rname = ggc_strdup (rname);
27973 j = 0;
27974 RTVEC_ELT (p, j++) = ret_rtx;
27975 RTVEC_ELT (p, j++)
27976 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
27977 /* The instruction pattern requires a clobber here;
27978 it is shared with the restVEC helper. */
27979 RTVEC_ELT (p, j++)
27980 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
27983 /* CR register traditionally saved as CR2. */
27984 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
27985 RTVEC_ELT (p, j++)
27986 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
27987 if (flag_shrink_wrap)
27989 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27990 gen_rtx_REG (Pmode, LR_REGNO),
27991 cfa_restores);
27992 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27996 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27998 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27999 RTVEC_ELT (p, j++)
28000 = gen_frame_load (reg,
28001 frame_reg_rtx, info->gp_save_offset + reg_size * i);
28002 if (flag_shrink_wrap)
28003 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28005 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28007 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
28008 RTVEC_ELT (p, j++)
28009 = gen_frame_load (reg,
28010 frame_reg_rtx, info->altivec_save_offset + 16 * i);
28011 if (flag_shrink_wrap)
28012 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28014 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
28016 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28017 ? DFmode : SFmode),
28018 info->first_fp_reg_save + i);
28019 RTVEC_ELT (p, j++)
28020 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28021 if (flag_shrink_wrap)
28022 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28024 RTVEC_ELT (p, j++)
28025 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
28026 RTVEC_ELT (p, j++)
28027 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
28028 RTVEC_ELT (p, j++)
28029 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
28030 RTVEC_ELT (p, j++)
28031 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
28032 RTVEC_ELT (p, j++)
28033 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28034 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28036 if (flag_shrink_wrap)
28038 REG_NOTES (insn) = cfa_restores;
28039 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28040 RTX_FRAME_RELATED_P (insn) = 1;
28042 return;
28045 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28046 if (info->push_p)
28047 frame_off = info->total_size;
28049 /* Restore AltiVec registers if we must do so before adjusting the
28050 stack. */
28051 if (info->altivec_size != 0
28052 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28053 || (DEFAULT_ABI != ABI_V4
28054 && offset_below_red_zone_p (info->altivec_save_offset))))
28056 int i;
28057 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28059 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28060 if (use_backchain_to_restore_sp)
28062 int frame_regno = 11;
28064 if ((strategy & REST_INLINE_VRS) == 0)
28066 /* Of r11 and r12, select the one not clobbered by an
28067 out-of-line restore function for the frame register. */
28068 frame_regno = 11 + 12 - scratch_regno;
28070 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28071 emit_move_insn (frame_reg_rtx,
28072 gen_rtx_MEM (Pmode, sp_reg_rtx));
28073 frame_off = 0;
28075 else if (frame_pointer_needed)
28076 frame_reg_rtx = hard_frame_pointer_rtx;
28078 if ((strategy & REST_INLINE_VRS) == 0)
28080 int end_save = info->altivec_save_offset + info->altivec_size;
28081 int ptr_off;
28082 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28083 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28085 if (end_save + frame_off != 0)
28087 rtx offset = GEN_INT (end_save + frame_off);
28089 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28091 else
28092 emit_move_insn (ptr_reg, frame_reg_rtx);
28094 ptr_off = -end_save;
28095 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28096 info->altivec_save_offset + ptr_off,
28097 0, V4SImode, SAVRES_VR);
28099 else
28101 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28102 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28104 rtx addr, areg, mem, insn;
28105 rtx reg = gen_rtx_REG (V4SImode, i);
28106 HOST_WIDE_INT offset
28107 = (info->altivec_save_offset + frame_off
28108 + 16 * (i - info->first_altivec_reg_save));
28110 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28112 mem = gen_frame_mem (V4SImode,
28113 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28114 GEN_INT (offset)));
28115 insn = gen_rtx_SET (reg, mem);
28117 else
28119 areg = gen_rtx_REG (Pmode, 0);
28120 emit_move_insn (areg, GEN_INT (offset));
28122 /* AltiVec addressing mode is [reg+reg]. */
28123 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28124 mem = gen_frame_mem (V4SImode, addr);
28126 /* Rather than emitting a generic move, force use of the
28127 lvx instruction, which we always want. In particular we
28128 don't want lxvd2x/xxpermdi for little endian. */
28129 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28132 (void) emit_insn (insn);
28136 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28137 if (((strategy & REST_INLINE_VRS) == 0
28138 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28139 && (flag_shrink_wrap
28140 || (offset_below_red_zone_p
28141 (info->altivec_save_offset
28142 + 16 * (i - info->first_altivec_reg_save)))))
28144 rtx reg = gen_rtx_REG (V4SImode, i);
28145 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28149 /* Restore VRSAVE if we must do so before adjusting the stack. */
28150 if (info->vrsave_size != 0
28151 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28152 || (DEFAULT_ABI != ABI_V4
28153 && offset_below_red_zone_p (info->vrsave_save_offset))))
28155 rtx reg;
28157 if (frame_reg_rtx == sp_reg_rtx)
28159 if (use_backchain_to_restore_sp)
28161 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28162 emit_move_insn (frame_reg_rtx,
28163 gen_rtx_MEM (Pmode, sp_reg_rtx));
28164 frame_off = 0;
28166 else if (frame_pointer_needed)
28167 frame_reg_rtx = hard_frame_pointer_rtx;
28170 reg = gen_rtx_REG (SImode, 12);
28171 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28172 info->vrsave_save_offset + frame_off));
28174 emit_insn (generate_set_vrsave (reg, info, 1));
28177 insn = NULL_RTX;
28178 /* If we have a large stack frame, restore the old stack pointer
28179 using the backchain. */
28180 if (use_backchain_to_restore_sp)
28182 if (frame_reg_rtx == sp_reg_rtx)
28184 /* Under V.4, don't reset the stack pointer until after we're done
28185 loading the saved registers. */
28186 if (DEFAULT_ABI == ABI_V4)
28187 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28189 insn = emit_move_insn (frame_reg_rtx,
28190 gen_rtx_MEM (Pmode, sp_reg_rtx));
28191 frame_off = 0;
28193 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28194 && DEFAULT_ABI == ABI_V4)
28195 /* frame_reg_rtx has been set up by the altivec restore. */
28197 else
28199 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28200 frame_reg_rtx = sp_reg_rtx;
28203 /* If we have a frame pointer, we can restore the old stack pointer
28204 from it. */
28205 else if (frame_pointer_needed)
28207 frame_reg_rtx = sp_reg_rtx;
28208 if (DEFAULT_ABI == ABI_V4)
28209 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28210 /* Prevent reordering memory accesses against stack pointer restore. */
28211 else if (cfun->calls_alloca
28212 || offset_below_red_zone_p (-info->total_size))
28213 rs6000_emit_stack_tie (frame_reg_rtx, true);
28215 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28216 GEN_INT (info->total_size)));
28217 frame_off = 0;
28219 else if (info->push_p
28220 && DEFAULT_ABI != ABI_V4
28221 && !crtl->calls_eh_return)
28223 /* Prevent reordering memory accesses against stack pointer restore. */
28224 if (cfun->calls_alloca
28225 || offset_below_red_zone_p (-info->total_size))
28226 rs6000_emit_stack_tie (frame_reg_rtx, false);
28227 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28228 GEN_INT (info->total_size)));
28229 frame_off = 0;
28231 if (insn && frame_reg_rtx == sp_reg_rtx)
28233 if (cfa_restores)
28235 REG_NOTES (insn) = cfa_restores;
28236 cfa_restores = NULL_RTX;
28238 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28239 RTX_FRAME_RELATED_P (insn) = 1;
28242 /* Restore AltiVec registers if we have not done so already. */
28243 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28244 && info->altivec_size != 0
28245 && (DEFAULT_ABI == ABI_V4
28246 || !offset_below_red_zone_p (info->altivec_save_offset)))
28248 int i;
28250 if ((strategy & REST_INLINE_VRS) == 0)
28252 int end_save = info->altivec_save_offset + info->altivec_size;
28253 int ptr_off;
28254 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28255 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28256 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28258 if (end_save + frame_off != 0)
28260 rtx offset = GEN_INT (end_save + frame_off);
28262 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28264 else
28265 emit_move_insn (ptr_reg, frame_reg_rtx);
28267 ptr_off = -end_save;
28268 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28269 info->altivec_save_offset + ptr_off,
28270 0, V4SImode, SAVRES_VR);
28271 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28273 /* Frame reg was clobbered by out-of-line save. Restore it
28274 from ptr_reg, and if we are calling out-of-line gpr or
28275 fpr restore set up the correct pointer and offset. */
28276 unsigned newptr_regno = 1;
28277 if (!restoring_GPRs_inline)
28279 bool lr = info->gp_save_offset + info->gp_size == 0;
28280 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28281 newptr_regno = ptr_regno_for_savres (sel);
28282 end_save = info->gp_save_offset + info->gp_size;
28284 else if (!restoring_FPRs_inline)
28286 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28287 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28288 newptr_regno = ptr_regno_for_savres (sel);
28289 end_save = info->fp_save_offset + info->fp_size;
28292 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28293 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28295 if (end_save + ptr_off != 0)
28297 rtx offset = GEN_INT (end_save + ptr_off);
28299 frame_off = -end_save;
28300 if (TARGET_32BIT)
28301 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28302 ptr_reg, offset));
28303 else
28304 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28305 ptr_reg, offset));
28307 else
28309 frame_off = ptr_off;
28310 emit_move_insn (frame_reg_rtx, ptr_reg);
28314 else
28316 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28317 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28319 rtx addr, areg, mem, insn;
28320 rtx reg = gen_rtx_REG (V4SImode, i);
28321 HOST_WIDE_INT offset
28322 = (info->altivec_save_offset + frame_off
28323 + 16 * (i - info->first_altivec_reg_save));
28325 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28327 mem = gen_frame_mem (V4SImode,
28328 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28329 GEN_INT (offset)));
28330 insn = gen_rtx_SET (reg, mem);
28332 else
28334 areg = gen_rtx_REG (Pmode, 0);
28335 emit_move_insn (areg, GEN_INT (offset));
28337 /* AltiVec addressing mode is [reg+reg]. */
28338 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28339 mem = gen_frame_mem (V4SImode, addr);
28341 /* Rather than emitting a generic move, force use of the
28342 lvx instruction, which we always want. In particular we
28343 don't want lxvd2x/xxpermdi for little endian. */
28344 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28347 (void) emit_insn (insn);
28351 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28352 if (((strategy & REST_INLINE_VRS) == 0
28353 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28354 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28356 rtx reg = gen_rtx_REG (V4SImode, i);
28357 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28361 /* Restore VRSAVE if we have not done so already. */
28362 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28363 && info->vrsave_size != 0
28364 && (DEFAULT_ABI == ABI_V4
28365 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28367 rtx reg;
28369 reg = gen_rtx_REG (SImode, 12);
28370 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28371 info->vrsave_save_offset + frame_off));
28373 emit_insn (generate_set_vrsave (reg, info, 1));
28376 /* If we exit by an out-of-line restore function on ABI_V4 then that
28377 function will deallocate the stack, so we don't need to worry
28378 about the unwinder restoring cr from an invalid stack frame
28379 location. */
28380 exit_func = (!restoring_FPRs_inline
28381 || (!restoring_GPRs_inline
28382 && info->first_fp_reg_save == 64));
28384 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28385 *separate* slots if the routine calls __builtin_eh_return, so
28386 that they can be independently restored by the unwinder. */
28387 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28389 int i, cr_off = info->ehcr_offset;
28391 for (i = 0; i < 8; i++)
28392 if (!call_used_regs[CR0_REGNO + i])
28394 rtx reg = gen_rtx_REG (SImode, 0);
28395 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28396 cr_off + frame_off));
28398 insn = emit_insn (gen_movsi_to_cr_one
28399 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28401 if (!exit_func && flag_shrink_wrap)
28403 add_reg_note (insn, REG_CFA_RESTORE,
28404 gen_rtx_REG (SImode, CR0_REGNO + i));
28406 RTX_FRAME_RELATED_P (insn) = 1;
28409 cr_off += reg_size;
28413 /* Get the old lr if we saved it. If we are restoring registers
28414 out-of-line, then the out-of-line routines can do this for us. */
28415 if (restore_lr && restoring_GPRs_inline)
28416 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28418 /* Get the old cr if we saved it. */
28419 if (info->cr_save_p)
28421 unsigned cr_save_regno = 12;
28423 if (!restoring_GPRs_inline)
28425 /* Ensure we don't use the register used by the out-of-line
28426 gpr register restore below. */
28427 bool lr = info->gp_save_offset + info->gp_size == 0;
28428 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28429 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28431 if (gpr_ptr_regno == 12)
28432 cr_save_regno = 11;
28433 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28435 else if (REGNO (frame_reg_rtx) == 12)
28436 cr_save_regno = 11;
28438 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28439 info->cr_save_offset + frame_off,
28440 exit_func);
28443 /* Set LR here to try to overlap restores below. */
28444 if (restore_lr && restoring_GPRs_inline)
28445 restore_saved_lr (0, exit_func);
28447 /* Load exception handler data registers, if needed. */
28448 if (crtl->calls_eh_return)
28450 unsigned int i, regno;
28452 if (TARGET_AIX)
28454 rtx reg = gen_rtx_REG (reg_mode, 2);
28455 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28456 frame_off + RS6000_TOC_SAVE_SLOT));
28459 for (i = 0; ; ++i)
28461 rtx mem;
28463 regno = EH_RETURN_DATA_REGNO (i);
28464 if (regno == INVALID_REGNUM)
28465 break;
28467 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28468 info->ehrd_offset + frame_off
28469 + reg_size * (int) i);
28471 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28475 /* Restore GPRs. This is done as a PARALLEL if we are using
28476 the load-multiple instructions. */
28477 if (!restoring_GPRs_inline)
28479 /* We are jumping to an out-of-line function. */
28480 rtx ptr_reg;
28481 int end_save = info->gp_save_offset + info->gp_size;
28482 bool can_use_exit = end_save == 0;
28483 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28484 int ptr_off;
28486 /* Emit stack reset code if we need it. */
28487 ptr_regno = ptr_regno_for_savres (sel);
28488 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28489 if (can_use_exit)
28490 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28491 else if (end_save + frame_off != 0)
28492 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28493 GEN_INT (end_save + frame_off)));
28494 else if (REGNO (frame_reg_rtx) != ptr_regno)
28495 emit_move_insn (ptr_reg, frame_reg_rtx);
28496 if (REGNO (frame_reg_rtx) == ptr_regno)
28497 frame_off = -end_save;
28499 if (can_use_exit && info->cr_save_p)
28500 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28502 ptr_off = -end_save;
28503 rs6000_emit_savres_rtx (info, ptr_reg,
28504 info->gp_save_offset + ptr_off,
28505 info->lr_save_offset + ptr_off,
28506 reg_mode, sel);
28508 else if (using_load_multiple)
28510 rtvec p;
28511 p = rtvec_alloc (32 - info->first_gp_reg_save);
28512 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28513 RTVEC_ELT (p, i)
28514 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28515 frame_reg_rtx,
28516 info->gp_save_offset + frame_off + reg_size * i);
28517 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28519 else
28521 int offset = info->gp_save_offset + frame_off;
28522 for (i = info->first_gp_reg_save; i < 32; i++)
28524 if (rs6000_reg_live_or_pic_offset_p (i)
28525 && !cfun->machine->gpr_is_wrapped_separately[i])
28527 rtx reg = gen_rtx_REG (reg_mode, i);
28528 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28531 offset += reg_size;
28535 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28537 /* If the frame pointer was used then we can't delay emitting
28538 a REG_CFA_DEF_CFA note. This must happen on the insn that
28539 restores the frame pointer, r31. We may have already emitted
28540 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28541 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28542 be harmless if emitted. */
28543 if (frame_pointer_needed)
28545 insn = get_last_insn ();
28546 add_reg_note (insn, REG_CFA_DEF_CFA,
28547 plus_constant (Pmode, frame_reg_rtx, frame_off));
28548 RTX_FRAME_RELATED_P (insn) = 1;
28551 /* Set up cfa_restores. We always need these when
28552 shrink-wrapping. If not shrink-wrapping then we only need
28553 the cfa_restore when the stack location is no longer valid.
28554 The cfa_restores must be emitted on or before the insn that
28555 invalidates the stack, and of course must not be emitted
28556 before the insn that actually does the restore. The latter
28557 is why it is a bad idea to emit the cfa_restores as a group
28558 on the last instruction here that actually does a restore:
28559 That insn may be reordered with respect to others doing
28560 restores. */
28561 if (flag_shrink_wrap
28562 && !restoring_GPRs_inline
28563 && info->first_fp_reg_save == 64)
28564 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28566 for (i = info->first_gp_reg_save; i < 32; i++)
28567 if (!restoring_GPRs_inline
28568 || using_load_multiple
28569 || rs6000_reg_live_or_pic_offset_p (i))
28571 if (cfun->machine->gpr_is_wrapped_separately[i])
28572 continue;
28574 rtx reg = gen_rtx_REG (reg_mode, i);
28575 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28579 if (!restoring_GPRs_inline
28580 && info->first_fp_reg_save == 64)
28582 /* We are jumping to an out-of-line function. */
28583 if (cfa_restores)
28584 emit_cfa_restores (cfa_restores);
28585 return;
28588 if (restore_lr && !restoring_GPRs_inline)
28590 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28591 restore_saved_lr (0, exit_func);
28594 /* Restore fpr's if we need to do it without calling a function. */
28595 if (restoring_FPRs_inline)
28597 int offset = info->fp_save_offset + frame_off;
28598 for (i = info->first_fp_reg_save; i < 64; i++)
28600 if (save_reg_p (i)
28601 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28603 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28604 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28605 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28606 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28607 cfa_restores);
28610 offset += fp_reg_size;
28614 /* If we saved cr, restore it here. Just those that were used. */
28615 if (info->cr_save_p)
28616 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28618 /* If this is V.4, unwind the stack pointer after all of the loads
28619 have been done, or set up r11 if we are restoring fp out of line. */
28620 ptr_regno = 1;
28621 if (!restoring_FPRs_inline)
28623 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28624 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28625 ptr_regno = ptr_regno_for_savres (sel);
28628 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28629 if (REGNO (frame_reg_rtx) == ptr_regno)
28630 frame_off = 0;
28632 if (insn && restoring_FPRs_inline)
28634 if (cfa_restores)
28636 REG_NOTES (insn) = cfa_restores;
28637 cfa_restores = NULL_RTX;
28639 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28640 RTX_FRAME_RELATED_P (insn) = 1;
28643 if (crtl->calls_eh_return)
28645 rtx sa = EH_RETURN_STACKADJ_RTX;
28646 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28649 if (!sibcall && restoring_FPRs_inline)
28651 if (cfa_restores)
28653 /* We can't hang the cfa_restores off a simple return,
28654 since the shrink-wrap code sometimes uses an existing
28655 return. This means there might be a path from
28656 pre-prologue code to this return, and dwarf2cfi code
28657 wants the eh_frame unwinder state to be the same on
28658 all paths to any point. So we need to emit the
28659 cfa_restores before the return. For -m64 we really
28660 don't need epilogue cfa_restores at all, except for
28661 this irritating dwarf2cfi with shrink-wrap
28662 requirement; The stack red-zone means eh_frame info
28663 from the prologue telling the unwinder to restore
28664 from the stack is perfectly good right to the end of
28665 the function. */
28666 emit_insn (gen_blockage ());
28667 emit_cfa_restores (cfa_restores);
28668 cfa_restores = NULL_RTX;
28671 emit_jump_insn (targetm.gen_simple_return ());
28674 if (!sibcall && !restoring_FPRs_inline)
28676 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28677 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
28678 int elt = 0;
28679 RTVEC_ELT (p, elt++) = ret_rtx;
28680 if (lr)
28681 RTVEC_ELT (p, elt++)
28682 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
28684 /* We have to restore more than two FP registers, so branch to the
28685 restore function. It will return to our caller. */
28686 int i;
28687 int reg;
28688 rtx sym;
28690 if (flag_shrink_wrap)
28691 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28693 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
28694 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
28695 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28696 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28698 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28700 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28702 RTVEC_ELT (p, elt++)
28703 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28704 if (flag_shrink_wrap)
28705 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28708 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28711 if (cfa_restores)
28713 if (sibcall)
28714 /* Ensure the cfa_restores are hung off an insn that won't
28715 be reordered above other restores. */
28716 emit_insn (gen_blockage ());
28718 emit_cfa_restores (cfa_restores);
28722 /* Write function epilogue. */
28724 static void
28725 rs6000_output_function_epilogue (FILE *file,
28726 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28728 #if TARGET_MACHO
28729 macho_branch_islands ();
28732 rtx_insn *insn = get_last_insn ();
28733 rtx_insn *deleted_debug_label = NULL;
28735 /* Mach-O doesn't support labels at the end of objects, so if
28736 it looks like we might want one, take special action.
28738 First, collect any sequence of deleted debug labels. */
28739 while (insn
28740 && NOTE_P (insn)
28741 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28743 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28744 notes only, instead set their CODE_LABEL_NUMBER to -1,
28745 otherwise there would be code generation differences
28746 in between -g and -g0. */
28747 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28748 deleted_debug_label = insn;
28749 insn = PREV_INSN (insn);
28752 /* Second, if we have:
28753 label:
28754 barrier
28755 then this needs to be detected, so skip past the barrier. */
28757 if (insn && BARRIER_P (insn))
28758 insn = PREV_INSN (insn);
28760 /* Up to now we've only seen notes or barriers. */
28761 if (insn)
28763 if (LABEL_P (insn)
28764 || (NOTE_P (insn)
28765 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
28766 /* Trailing label: <barrier>. */
28767 fputs ("\tnop\n", file);
28768 else
28770 /* Lastly, see if we have a completely empty function body. */
28771 while (insn && ! INSN_P (insn))
28772 insn = PREV_INSN (insn);
28773 /* If we don't find any insns, we've got an empty function body;
28774 I.e. completely empty - without a return or branch. This is
28775 taken as the case where a function body has been removed
28776 because it contains an inline __builtin_unreachable(). GCC
28777 states that reaching __builtin_unreachable() means UB so we're
28778 not obliged to do anything special; however, we want
28779 non-zero-sized function bodies. To meet this, and help the
28780 user out, let's trap the case. */
28781 if (insn == NULL)
28782 fputs ("\ttrap\n", file);
28785 else if (deleted_debug_label)
28786 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28787 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28788 CODE_LABEL_NUMBER (insn) = -1;
28790 #endif
28792 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28793 on its format.
28795 We don't output a traceback table if -finhibit-size-directive was
28796 used. The documentation for -finhibit-size-directive reads
28797 ``don't output a @code{.size} assembler directive, or anything
28798 else that would cause trouble if the function is split in the
28799 middle, and the two halves are placed at locations far apart in
28800 memory.'' The traceback table has this property, since it
28801 includes the offset from the start of the function to the
28802 traceback table itself.
28804 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28805 different traceback table. */
28806 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28807 && ! flag_inhibit_size_directive
28808 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28810 const char *fname = NULL;
28811 const char *language_string = lang_hooks.name;
28812 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28813 int i;
28814 int optional_tbtab;
28815 rs6000_stack_t *info = rs6000_stack_info ();
28817 if (rs6000_traceback == traceback_full)
28818 optional_tbtab = 1;
28819 else if (rs6000_traceback == traceback_part)
28820 optional_tbtab = 0;
28821 else
28822 optional_tbtab = !optimize_size && !TARGET_ELF;
28824 if (optional_tbtab)
28826 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28827 while (*fname == '.') /* V.4 encodes . in the name */
28828 fname++;
28830 /* Need label immediately before tbtab, so we can compute
28831 its offset from the function start. */
28832 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28833 ASM_OUTPUT_LABEL (file, fname);
28836 /* The .tbtab pseudo-op can only be used for the first eight
28837 expressions, since it can't handle the possibly variable
28838 length fields that follow. However, if you omit the optional
28839 fields, the assembler outputs zeros for all optional fields
28840 anyways, giving each variable length field is minimum length
28841 (as defined in sys/debug.h). Thus we can not use the .tbtab
28842 pseudo-op at all. */
28844 /* An all-zero word flags the start of the tbtab, for debuggers
28845 that have to find it by searching forward from the entry
28846 point or from the current pc. */
28847 fputs ("\t.long 0\n", file);
28849 /* Tbtab format type. Use format type 0. */
28850 fputs ("\t.byte 0,", file);
28852 /* Language type. Unfortunately, there does not seem to be any
28853 official way to discover the language being compiled, so we
28854 use language_string.
28855 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
28856 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
28857 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
28858 either, so for now use 0. */
28859 if (lang_GNU_C ()
28860 || ! strcmp (language_string, "GNU GIMPLE")
28861 || ! strcmp (language_string, "GNU Go")
28862 || ! strcmp (language_string, "libgccjit"))
28863 i = 0;
28864 else if (! strcmp (language_string, "GNU F77")
28865 || lang_GNU_Fortran ())
28866 i = 1;
28867 else if (! strcmp (language_string, "GNU Pascal"))
28868 i = 2;
28869 else if (! strcmp (language_string, "GNU Ada"))
28870 i = 3;
28871 else if (lang_GNU_CXX ()
28872 || ! strcmp (language_string, "GNU Objective-C++"))
28873 i = 9;
28874 else if (! strcmp (language_string, "GNU Java"))
28875 i = 13;
28876 else if (! strcmp (language_string, "GNU Objective-C"))
28877 i = 14;
28878 else
28879 gcc_unreachable ();
28880 fprintf (file, "%d,", i);
28882 /* 8 single bit fields: global linkage (not set for C extern linkage,
28883 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
28884 from start of procedure stored in tbtab, internal function, function
28885 has controlled storage, function has no toc, function uses fp,
28886 function logs/aborts fp operations. */
28887 /* Assume that fp operations are used if any fp reg must be saved. */
28888 fprintf (file, "%d,",
28889 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
28891 /* 6 bitfields: function is interrupt handler, name present in
28892 proc table, function calls alloca, on condition directives
28893 (controls stack walks, 3 bits), saves condition reg, saves
28894 link reg. */
28895 /* The `function calls alloca' bit seems to be set whenever reg 31 is
28896 set up as a frame pointer, even when there is no alloca call. */
28897 fprintf (file, "%d,",
28898 ((optional_tbtab << 6)
28899 | ((optional_tbtab & frame_pointer_needed) << 5)
28900 | (info->cr_save_p << 1)
28901 | (info->lr_save_p)));
28903 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
28904 (6 bits). */
28905 fprintf (file, "%d,",
28906 (info->push_p << 7) | (64 - info->first_fp_reg_save));
28908 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
28909 fprintf (file, "%d,", (32 - first_reg_to_save ()));
28911 if (optional_tbtab)
28913 /* Compute the parameter info from the function decl argument
28914 list. */
28915 tree decl;
28916 int next_parm_info_bit = 31;
28918 for (decl = DECL_ARGUMENTS (current_function_decl);
28919 decl; decl = DECL_CHAIN (decl))
28921 rtx parameter = DECL_INCOMING_RTL (decl);
28922 machine_mode mode = GET_MODE (parameter);
28924 if (GET_CODE (parameter) == REG)
28926 if (SCALAR_FLOAT_MODE_P (mode))
28928 int bits;
28930 float_parms++;
28932 switch (mode)
28934 case SFmode:
28935 case SDmode:
28936 bits = 0x2;
28937 break;
28939 case DFmode:
28940 case DDmode:
28941 case TFmode:
28942 case TDmode:
28943 case IFmode:
28944 case KFmode:
28945 bits = 0x3;
28946 break;
28948 default:
28949 gcc_unreachable ();
28952 /* If only one bit will fit, don't or in this entry. */
28953 if (next_parm_info_bit > 0)
28954 parm_info |= (bits << (next_parm_info_bit - 1));
28955 next_parm_info_bit -= 2;
28957 else
28959 fixed_parms += ((GET_MODE_SIZE (mode)
28960 + (UNITS_PER_WORD - 1))
28961 / UNITS_PER_WORD);
28962 next_parm_info_bit -= 1;
28968 /* Number of fixed point parameters. */
28969 /* This is actually the number of words of fixed point parameters; thus
28970 an 8 byte struct counts as 2; and thus the maximum value is 8. */
28971 fprintf (file, "%d,", fixed_parms);
28973 /* 2 bitfields: number of floating point parameters (7 bits), parameters
28974 all on stack. */
28975 /* This is actually the number of fp registers that hold parameters;
28976 and thus the maximum value is 13. */
28977 /* Set parameters on stack bit if parameters are not in their original
28978 registers, regardless of whether they are on the stack? Xlc
28979 seems to set the bit when not optimizing. */
28980 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
28982 if (optional_tbtab)
28984 /* Optional fields follow. Some are variable length. */
28986 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
28987 float, 11 double float. */
28988 /* There is an entry for each parameter in a register, in the order
28989 that they occur in the parameter list. Any intervening arguments
28990 on the stack are ignored. If the list overflows a long (max
28991 possible length 34 bits) then completely leave off all elements
28992 that don't fit. */
28993 /* Only emit this long if there was at least one parameter. */
28994 if (fixed_parms || float_parms)
28995 fprintf (file, "\t.long %d\n", parm_info);
28997 /* Offset from start of code to tb table. */
28998 fputs ("\t.long ", file);
28999 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29000 RS6000_OUTPUT_BASENAME (file, fname);
29001 putc ('-', file);
29002 rs6000_output_function_entry (file, fname);
29003 putc ('\n', file);
29005 /* Interrupt handler mask. */
29006 /* Omit this long, since we never set the interrupt handler bit
29007 above. */
29009 /* Number of CTL (controlled storage) anchors. */
29010 /* Omit this long, since the has_ctl bit is never set above. */
29012 /* Displacement into stack of each CTL anchor. */
29013 /* Omit this list of longs, because there are no CTL anchors. */
29015 /* Length of function name. */
29016 if (*fname == '*')
29017 ++fname;
29018 fprintf (file, "\t.short %d\n", (int) strlen (fname));
29020 /* Function name. */
29021 assemble_string (fname, strlen (fname));
29023 /* Register for alloca automatic storage; this is always reg 31.
29024 Only emit this if the alloca bit was set above. */
29025 if (frame_pointer_needed)
29026 fputs ("\t.byte 31\n", file);
29028 fputs ("\t.align 2\n", file);
29032 /* Arrange to define .LCTOC1 label, if not already done. */
29033 if (need_toc_init)
29035 need_toc_init = 0;
29036 if (!toc_initialized)
29038 switch_to_section (toc_section);
29039 switch_to_section (current_function_section ());
29044 /* -fsplit-stack support. */
29046 /* A SYMBOL_REF for __morestack. */
29047 static GTY(()) rtx morestack_ref;
29049 static rtx
29050 gen_add3_const (rtx rt, rtx ra, long c)
29052 if (TARGET_64BIT)
29053 return gen_adddi3 (rt, ra, GEN_INT (c));
29054 else
29055 return gen_addsi3 (rt, ra, GEN_INT (c));
29058 /* Emit -fsplit-stack prologue, which goes before the regular function
29059 prologue (at local entry point in the case of ELFv2). */
29061 void
29062 rs6000_expand_split_stack_prologue (void)
29064 rs6000_stack_t *info = rs6000_stack_info ();
29065 unsigned HOST_WIDE_INT allocate;
29066 long alloc_hi, alloc_lo;
29067 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29068 rtx_insn *insn;
29070 gcc_assert (flag_split_stack && reload_completed);
29072 if (!info->push_p)
29073 return;
29075 if (global_regs[29])
29077 error ("-fsplit-stack uses register r29");
29078 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29079 "conflicts with %qD", global_regs_decl[29]);
29082 allocate = info->total_size;
29083 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29085 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29086 return;
29088 if (morestack_ref == NULL_RTX)
29090 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29091 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29092 | SYMBOL_FLAG_FUNCTION);
29095 r0 = gen_rtx_REG (Pmode, 0);
29096 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29097 r12 = gen_rtx_REG (Pmode, 12);
29098 emit_insn (gen_load_split_stack_limit (r0));
29099 /* Always emit two insns here to calculate the requested stack,
29100 so that the linker can edit them when adjusting size for calling
29101 non-split-stack code. */
29102 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29103 alloc_lo = -allocate - alloc_hi;
29104 if (alloc_hi != 0)
29106 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29107 if (alloc_lo != 0)
29108 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29109 else
29110 emit_insn (gen_nop ());
29112 else
29114 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29115 emit_insn (gen_nop ());
29118 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29119 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29120 ok_label = gen_label_rtx ();
29121 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29122 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29123 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29124 pc_rtx);
29125 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29126 JUMP_LABEL (insn) = ok_label;
29127 /* Mark the jump as very likely to be taken. */
29128 add_reg_br_prob_note (insn, profile_probability::very_likely ());
29130 lr = gen_rtx_REG (Pmode, LR_REGNO);
29131 insn = emit_move_insn (r0, lr);
29132 RTX_FRAME_RELATED_P (insn) = 1;
29133 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29134 RTX_FRAME_RELATED_P (insn) = 1;
29136 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29137 const0_rtx, const0_rtx));
29138 call_fusage = NULL_RTX;
29139 use_reg (&call_fusage, r12);
29140 /* Say the call uses r0, even though it doesn't, to stop regrename
29141 from twiddling with the insns saving lr, trashing args for cfun.
29142 The insns restoring lr are similarly protected by making
29143 split_stack_return use r0. */
29144 use_reg (&call_fusage, r0);
29145 add_function_usage_to (insn, call_fusage);
29146 /* Indicate that this function can't jump to non-local gotos. */
29147 make_reg_eh_region_note_nothrow_nononlocal (insn);
29148 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29149 insn = emit_move_insn (lr, r0);
29150 add_reg_note (insn, REG_CFA_RESTORE, lr);
29151 RTX_FRAME_RELATED_P (insn) = 1;
29152 emit_insn (gen_split_stack_return ());
29154 emit_label (ok_label);
29155 LABEL_NUSES (ok_label) = 1;
29158 /* Return the internal arg pointer used for function incoming
29159 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29160 to copy it to a pseudo in order for it to be preserved over calls
29161 and suchlike. We'd really like to use a pseudo here for the
29162 internal arg pointer but data-flow analysis is not prepared to
29163 accept pseudos as live at the beginning of a function. */
29165 static rtx
29166 rs6000_internal_arg_pointer (void)
29168 if (flag_split_stack
29169 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29170 == NULL))
29173 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29175 rtx pat;
29177 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29178 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29180 /* Put the pseudo initialization right after the note at the
29181 beginning of the function. */
29182 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29183 gen_rtx_REG (Pmode, 12));
29184 push_topmost_sequence ();
29185 emit_insn_after (pat, get_insns ());
29186 pop_topmost_sequence ();
29188 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29189 FIRST_PARM_OFFSET (current_function_decl));
29191 return virtual_incoming_args_rtx;
29194 /* We may have to tell the dataflow pass that the split stack prologue
29195 is initializing a register. */
29197 static void
29198 rs6000_live_on_entry (bitmap regs)
29200 if (flag_split_stack)
29201 bitmap_set_bit (regs, 12);
29204 /* Emit -fsplit-stack dynamic stack allocation space check. */
29206 void
29207 rs6000_split_stack_space_check (rtx size, rtx label)
29209 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29210 rtx limit = gen_reg_rtx (Pmode);
29211 rtx requested = gen_reg_rtx (Pmode);
29212 rtx cmp = gen_reg_rtx (CCUNSmode);
29213 rtx jump;
29215 emit_insn (gen_load_split_stack_limit (limit));
29216 if (CONST_INT_P (size))
29217 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29218 else
29220 size = force_reg (Pmode, size);
29221 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29223 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29224 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29225 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29226 gen_rtx_LABEL_REF (VOIDmode, label),
29227 pc_rtx);
29228 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29229 JUMP_LABEL (jump) = label;
29232 /* A C compound statement that outputs the assembler code for a thunk
29233 function, used to implement C++ virtual function calls with
29234 multiple inheritance. The thunk acts as a wrapper around a virtual
29235 function, adjusting the implicit object parameter before handing
29236 control off to the real function.
29238 First, emit code to add the integer DELTA to the location that
29239 contains the incoming first argument. Assume that this argument
29240 contains a pointer, and is the one used to pass the `this' pointer
29241 in C++. This is the incoming argument *before* the function
29242 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29243 values of all other incoming arguments.
29245 After the addition, emit code to jump to FUNCTION, which is a
29246 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29247 not touch the return address. Hence returning from FUNCTION will
29248 return to whoever called the current `thunk'.
29250 The effect must be as if FUNCTION had been called directly with the
29251 adjusted first argument. This macro is responsible for emitting
29252 all of the code for a thunk function; output_function_prologue()
29253 and output_function_epilogue() are not invoked.
29255 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29256 been extracted from it.) It might possibly be useful on some
29257 targets, but probably not.
29259 If you do not define this macro, the target-independent code in the
29260 C++ frontend will generate a less efficient heavyweight thunk that
29261 calls FUNCTION instead of jumping to it. The generic approach does
29262 not support varargs. */
29264 static void
29265 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29266 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29267 tree function)
29269 rtx this_rtx, funexp;
29270 rtx_insn *insn;
29272 reload_completed = 1;
29273 epilogue_completed = 1;
29275 /* Mark the end of the (empty) prologue. */
29276 emit_note (NOTE_INSN_PROLOGUE_END);
29278 /* Find the "this" pointer. If the function returns a structure,
29279 the structure return pointer is in r3. */
29280 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29281 this_rtx = gen_rtx_REG (Pmode, 4);
29282 else
29283 this_rtx = gen_rtx_REG (Pmode, 3);
29285 /* Apply the constant offset, if required. */
29286 if (delta)
29287 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29289 /* Apply the offset from the vtable, if required. */
29290 if (vcall_offset)
29292 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29293 rtx tmp = gen_rtx_REG (Pmode, 12);
29295 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29296 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29298 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29299 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29301 else
29303 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29305 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29307 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29310 /* Generate a tail call to the target function. */
29311 if (!TREE_USED (function))
29313 assemble_external (function);
29314 TREE_USED (function) = 1;
29316 funexp = XEXP (DECL_RTL (function), 0);
29317 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29319 #if TARGET_MACHO
29320 if (MACHOPIC_INDIRECT)
29321 funexp = machopic_indirect_call_target (funexp);
29322 #endif
29324 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29325 generate sibcall RTL explicitly. */
29326 insn = emit_call_insn (
29327 gen_rtx_PARALLEL (VOIDmode,
29328 gen_rtvec (3,
29329 gen_rtx_CALL (VOIDmode,
29330 funexp, const0_rtx),
29331 gen_rtx_USE (VOIDmode, const0_rtx),
29332 simple_return_rtx)));
29333 SIBLING_CALL_P (insn) = 1;
29334 emit_barrier ();
29336 /* Run just enough of rest_of_compilation to get the insns emitted.
29337 There's not really enough bulk here to make other passes such as
29338 instruction scheduling worth while. Note that use_thunk calls
29339 assemble_start_function and assemble_end_function. */
29340 insn = get_insns ();
29341 shorten_branches (insn);
29342 final_start_function (insn, file, 1);
29343 final (insn, file, 1);
29344 final_end_function ();
29346 reload_completed = 0;
29347 epilogue_completed = 0;
29350 /* A quick summary of the various types of 'constant-pool tables'
29351 under PowerPC:
29353 Target Flags Name One table per
29354 AIX (none) AIX TOC object file
29355 AIX -mfull-toc AIX TOC object file
29356 AIX -mminimal-toc AIX minimal TOC translation unit
29357 SVR4/EABI (none) SVR4 SDATA object file
29358 SVR4/EABI -fpic SVR4 pic object file
29359 SVR4/EABI -fPIC SVR4 PIC translation unit
29360 SVR4/EABI -mrelocatable EABI TOC function
29361 SVR4/EABI -maix AIX TOC object file
29362 SVR4/EABI -maix -mminimal-toc
29363 AIX minimal TOC translation unit
29365 Name Reg. Set by entries contains:
29366 made by addrs? fp? sum?
29368 AIX TOC 2 crt0 as Y option option
29369 AIX minimal TOC 30 prolog gcc Y Y option
29370 SVR4 SDATA 13 crt0 gcc N Y N
29371 SVR4 pic 30 prolog ld Y not yet N
29372 SVR4 PIC 30 prolog gcc Y option option
29373 EABI TOC 30 prolog gcc Y option option
29377 /* Hash functions for the hash table. */
29379 static unsigned
29380 rs6000_hash_constant (rtx k)
29382 enum rtx_code code = GET_CODE (k);
29383 machine_mode mode = GET_MODE (k);
29384 unsigned result = (code << 3) ^ mode;
29385 const char *format;
29386 int flen, fidx;
29388 format = GET_RTX_FORMAT (code);
29389 flen = strlen (format);
29390 fidx = 0;
29392 switch (code)
29394 case LABEL_REF:
29395 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29397 case CONST_WIDE_INT:
29399 int i;
29400 flen = CONST_WIDE_INT_NUNITS (k);
29401 for (i = 0; i < flen; i++)
29402 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29403 return result;
29406 case CONST_DOUBLE:
29407 if (mode != VOIDmode)
29408 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29409 flen = 2;
29410 break;
29412 case CODE_LABEL:
29413 fidx = 3;
29414 break;
29416 default:
29417 break;
29420 for (; fidx < flen; fidx++)
29421 switch (format[fidx])
29423 case 's':
29425 unsigned i, len;
29426 const char *str = XSTR (k, fidx);
29427 len = strlen (str);
29428 result = result * 613 + len;
29429 for (i = 0; i < len; i++)
29430 result = result * 613 + (unsigned) str[i];
29431 break;
29433 case 'u':
29434 case 'e':
29435 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29436 break;
29437 case 'i':
29438 case 'n':
29439 result = result * 613 + (unsigned) XINT (k, fidx);
29440 break;
29441 case 'w':
29442 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29443 result = result * 613 + (unsigned) XWINT (k, fidx);
29444 else
29446 size_t i;
29447 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29448 result = result * 613 + (unsigned) (XWINT (k, fidx)
29449 >> CHAR_BIT * i);
29451 break;
29452 case '0':
29453 break;
29454 default:
29455 gcc_unreachable ();
29458 return result;
29461 hashval_t
29462 toc_hasher::hash (toc_hash_struct *thc)
29464 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29467 /* Compare H1 and H2 for equivalence. */
29469 bool
29470 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29472 rtx r1 = h1->key;
29473 rtx r2 = h2->key;
29475 if (h1->key_mode != h2->key_mode)
29476 return 0;
29478 return rtx_equal_p (r1, r2);
29481 /* These are the names given by the C++ front-end to vtables, and
29482 vtable-like objects. Ideally, this logic should not be here;
29483 instead, there should be some programmatic way of inquiring as
29484 to whether or not an object is a vtable. */
29486 #define VTABLE_NAME_P(NAME) \
29487 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29488 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29489 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29490 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29491 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29493 #ifdef NO_DOLLAR_IN_LABEL
29494 /* Return a GGC-allocated character string translating dollar signs in
29495 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29497 const char *
29498 rs6000_xcoff_strip_dollar (const char *name)
29500 char *strip, *p;
29501 const char *q;
29502 size_t len;
29504 q = (const char *) strchr (name, '$');
29506 if (q == 0 || q == name)
29507 return name;
29509 len = strlen (name);
29510 strip = XALLOCAVEC (char, len + 1);
29511 strcpy (strip, name);
29512 p = strip + (q - name);
29513 while (p)
29515 *p = '_';
29516 p = strchr (p + 1, '$');
29519 return ggc_alloc_string (strip, len);
29521 #endif
29523 void
29524 rs6000_output_symbol_ref (FILE *file, rtx x)
29526 const char *name = XSTR (x, 0);
29528 /* Currently C++ toc references to vtables can be emitted before it
29529 is decided whether the vtable is public or private. If this is
29530 the case, then the linker will eventually complain that there is
29531 a reference to an unknown section. Thus, for vtables only,
29532 we emit the TOC reference to reference the identifier and not the
29533 symbol. */
29534 if (VTABLE_NAME_P (name))
29536 RS6000_OUTPUT_BASENAME (file, name);
29538 else
29539 assemble_name (file, name);
29542 /* Output a TOC entry. We derive the entry name from what is being
29543 written. */
29545 void
29546 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29548 char buf[256];
29549 const char *name = buf;
29550 rtx base = x;
29551 HOST_WIDE_INT offset = 0;
29553 gcc_assert (!TARGET_NO_TOC);
29555 /* When the linker won't eliminate them, don't output duplicate
29556 TOC entries (this happens on AIX if there is any kind of TOC,
29557 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29558 CODE_LABELs. */
29559 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29561 struct toc_hash_struct *h;
29563 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29564 time because GGC is not initialized at that point. */
29565 if (toc_hash_table == NULL)
29566 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29568 h = ggc_alloc<toc_hash_struct> ();
29569 h->key = x;
29570 h->key_mode = mode;
29571 h->labelno = labelno;
29573 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29574 if (*found == NULL)
29575 *found = h;
29576 else /* This is indeed a duplicate.
29577 Set this label equal to that label. */
29579 fputs ("\t.set ", file);
29580 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29581 fprintf (file, "%d,", labelno);
29582 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29583 fprintf (file, "%d\n", ((*found)->labelno));
29585 #ifdef HAVE_AS_TLS
29586 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29587 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29588 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29590 fputs ("\t.set ", file);
29591 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29592 fprintf (file, "%d,", labelno);
29593 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29594 fprintf (file, "%d\n", ((*found)->labelno));
29596 #endif
29597 return;
29601 /* If we're going to put a double constant in the TOC, make sure it's
29602 aligned properly when strict alignment is on. */
29603 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29604 && STRICT_ALIGNMENT
29605 && GET_MODE_BITSIZE (mode) >= 64
29606 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29607 ASM_OUTPUT_ALIGN (file, 3);
29610 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29612 /* Handle FP constants specially. Note that if we have a minimal
29613 TOC, things we put here aren't actually in the TOC, so we can allow
29614 FP constants. */
29615 if (GET_CODE (x) == CONST_DOUBLE &&
29616 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29617 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29619 long k[4];
29621 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29622 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29623 else
29624 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29626 if (TARGET_64BIT)
29628 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29629 fputs (DOUBLE_INT_ASM_OP, file);
29630 else
29631 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29632 k[0] & 0xffffffff, k[1] & 0xffffffff,
29633 k[2] & 0xffffffff, k[3] & 0xffffffff);
29634 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29635 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29636 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29637 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29638 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29639 return;
29641 else
29643 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29644 fputs ("\t.long ", file);
29645 else
29646 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29647 k[0] & 0xffffffff, k[1] & 0xffffffff,
29648 k[2] & 0xffffffff, k[3] & 0xffffffff);
29649 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29650 k[0] & 0xffffffff, k[1] & 0xffffffff,
29651 k[2] & 0xffffffff, k[3] & 0xffffffff);
29652 return;
29655 else if (GET_CODE (x) == CONST_DOUBLE &&
29656 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29658 long k[2];
29660 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29661 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29662 else
29663 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29665 if (TARGET_64BIT)
29667 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29668 fputs (DOUBLE_INT_ASM_OP, file);
29669 else
29670 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29671 k[0] & 0xffffffff, k[1] & 0xffffffff);
29672 fprintf (file, "0x%lx%08lx\n",
29673 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29674 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29675 return;
29677 else
29679 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29680 fputs ("\t.long ", file);
29681 else
29682 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29683 k[0] & 0xffffffff, k[1] & 0xffffffff);
29684 fprintf (file, "0x%lx,0x%lx\n",
29685 k[0] & 0xffffffff, k[1] & 0xffffffff);
29686 return;
29689 else if (GET_CODE (x) == CONST_DOUBLE &&
29690 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29692 long l;
29694 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29695 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29696 else
29697 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29699 if (TARGET_64BIT)
29701 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29702 fputs (DOUBLE_INT_ASM_OP, file);
29703 else
29704 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29705 if (WORDS_BIG_ENDIAN)
29706 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29707 else
29708 fprintf (file, "0x%lx\n", l & 0xffffffff);
29709 return;
29711 else
29713 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29714 fputs ("\t.long ", file);
29715 else
29716 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29717 fprintf (file, "0x%lx\n", l & 0xffffffff);
29718 return;
29721 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
29723 unsigned HOST_WIDE_INT low;
29724 HOST_WIDE_INT high;
29726 low = INTVAL (x) & 0xffffffff;
29727 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29729 /* TOC entries are always Pmode-sized, so when big-endian
29730 smaller integer constants in the TOC need to be padded.
29731 (This is still a win over putting the constants in
29732 a separate constant pool, because then we'd have
29733 to have both a TOC entry _and_ the actual constant.)
29735 For a 32-bit target, CONST_INT values are loaded and shifted
29736 entirely within `low' and can be stored in one TOC entry. */
29738 /* It would be easy to make this work, but it doesn't now. */
29739 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29741 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29743 low |= high << 32;
29744 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29745 high = (HOST_WIDE_INT) low >> 32;
29746 low &= 0xffffffff;
29749 if (TARGET_64BIT)
29751 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29752 fputs (DOUBLE_INT_ASM_OP, file);
29753 else
29754 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29755 (long) high & 0xffffffff, (long) low & 0xffffffff);
29756 fprintf (file, "0x%lx%08lx\n",
29757 (long) high & 0xffffffff, (long) low & 0xffffffff);
29758 return;
29760 else
29762 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29764 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29765 fputs ("\t.long ", file);
29766 else
29767 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29768 (long) high & 0xffffffff, (long) low & 0xffffffff);
29769 fprintf (file, "0x%lx,0x%lx\n",
29770 (long) high & 0xffffffff, (long) low & 0xffffffff);
29772 else
29774 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29775 fputs ("\t.long ", file);
29776 else
29777 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29778 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29780 return;
29784 if (GET_CODE (x) == CONST)
29786 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29787 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
29789 base = XEXP (XEXP (x, 0), 0);
29790 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29793 switch (GET_CODE (base))
29795 case SYMBOL_REF:
29796 name = XSTR (base, 0);
29797 break;
29799 case LABEL_REF:
29800 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29801 CODE_LABEL_NUMBER (XEXP (base, 0)));
29802 break;
29804 case CODE_LABEL:
29805 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29806 break;
29808 default:
29809 gcc_unreachable ();
29812 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29813 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29814 else
29816 fputs ("\t.tc ", file);
29817 RS6000_OUTPUT_BASENAME (file, name);
29819 if (offset < 0)
29820 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29821 else if (offset)
29822 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29824 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29825 after other TOC symbols, reducing overflow of small TOC access
29826 to [TC] symbols. */
29827 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29828 ? "[TE]," : "[TC],", file);
29831 /* Currently C++ toc references to vtables can be emitted before it
29832 is decided whether the vtable is public or private. If this is
29833 the case, then the linker will eventually complain that there is
29834 a TOC reference to an unknown section. Thus, for vtables only,
29835 we emit the TOC reference to reference the symbol and not the
29836 section. */
29837 if (VTABLE_NAME_P (name))
29839 RS6000_OUTPUT_BASENAME (file, name);
29840 if (offset < 0)
29841 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29842 else if (offset > 0)
29843 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29845 else
29846 output_addr_const (file, x);
29848 #if HAVE_AS_TLS
29849 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
29851 switch (SYMBOL_REF_TLS_MODEL (base))
29853 case 0:
29854 break;
29855 case TLS_MODEL_LOCAL_EXEC:
29856 fputs ("@le", file);
29857 break;
29858 case TLS_MODEL_INITIAL_EXEC:
29859 fputs ("@ie", file);
29860 break;
29861 /* Use global-dynamic for local-dynamic. */
29862 case TLS_MODEL_GLOBAL_DYNAMIC:
29863 case TLS_MODEL_LOCAL_DYNAMIC:
29864 putc ('\n', file);
29865 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
29866 fputs ("\t.tc .", file);
29867 RS6000_OUTPUT_BASENAME (file, name);
29868 fputs ("[TC],", file);
29869 output_addr_const (file, x);
29870 fputs ("@m", file);
29871 break;
29872 default:
29873 gcc_unreachable ();
29876 #endif
29878 putc ('\n', file);
29881 /* Output an assembler pseudo-op to write an ASCII string of N characters
29882 starting at P to FILE.
29884 On the RS/6000, we have to do this using the .byte operation and
29885 write out special characters outside the quoted string.
29886 Also, the assembler is broken; very long strings are truncated,
29887 so we must artificially break them up early. */
29889 void
29890 output_ascii (FILE *file, const char *p, int n)
29892 char c;
29893 int i, count_string;
29894 const char *for_string = "\t.byte \"";
29895 const char *for_decimal = "\t.byte ";
29896 const char *to_close = NULL;
29898 count_string = 0;
29899 for (i = 0; i < n; i++)
29901 c = *p++;
29902 if (c >= ' ' && c < 0177)
29904 if (for_string)
29905 fputs (for_string, file);
29906 putc (c, file);
29908 /* Write two quotes to get one. */
29909 if (c == '"')
29911 putc (c, file);
29912 ++count_string;
29915 for_string = NULL;
29916 for_decimal = "\"\n\t.byte ";
29917 to_close = "\"\n";
29918 ++count_string;
29920 if (count_string >= 512)
29922 fputs (to_close, file);
29924 for_string = "\t.byte \"";
29925 for_decimal = "\t.byte ";
29926 to_close = NULL;
29927 count_string = 0;
29930 else
29932 if (for_decimal)
29933 fputs (for_decimal, file);
29934 fprintf (file, "%d", c);
29936 for_string = "\n\t.byte \"";
29937 for_decimal = ", ";
29938 to_close = "\n";
29939 count_string = 0;
29943 /* Now close the string if we have written one. Then end the line. */
29944 if (to_close)
29945 fputs (to_close, file);
29948 /* Generate a unique section name for FILENAME for a section type
29949 represented by SECTION_DESC. Output goes into BUF.
29951 SECTION_DESC can be any string, as long as it is different for each
29952 possible section type.
29954 We name the section in the same manner as xlc. The name begins with an
29955 underscore followed by the filename (after stripping any leading directory
29956 names) with the last period replaced by the string SECTION_DESC. If
29957 FILENAME does not contain a period, SECTION_DESC is appended to the end of
29958 the name. */
29960 void
29961 rs6000_gen_section_name (char **buf, const char *filename,
29962 const char *section_desc)
29964 const char *q, *after_last_slash, *last_period = 0;
29965 char *p;
29966 int len;
29968 after_last_slash = filename;
29969 for (q = filename; *q; q++)
29971 if (*q == '/')
29972 after_last_slash = q + 1;
29973 else if (*q == '.')
29974 last_period = q;
29977 len = strlen (after_last_slash) + strlen (section_desc) + 2;
29978 *buf = (char *) xmalloc (len);
29980 p = *buf;
29981 *p++ = '_';
29983 for (q = after_last_slash; *q; q++)
29985 if (q == last_period)
29987 strcpy (p, section_desc);
29988 p += strlen (section_desc);
29989 break;
29992 else if (ISALNUM (*q))
29993 *p++ = *q;
29996 if (last_period == 0)
29997 strcpy (p, section_desc);
29998 else
29999 *p = '\0';
30002 /* Emit profile function. */
30004 void
30005 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
30007 /* Non-standard profiling for kernels, which just saves LR then calls
30008 _mcount without worrying about arg saves. The idea is to change
30009 the function prologue as little as possible as it isn't easy to
30010 account for arg save/restore code added just for _mcount. */
30011 if (TARGET_PROFILE_KERNEL)
30012 return;
30014 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30016 #ifndef NO_PROFILE_COUNTERS
30017 # define NO_PROFILE_COUNTERS 0
30018 #endif
30019 if (NO_PROFILE_COUNTERS)
30020 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30021 LCT_NORMAL, VOIDmode, 0);
30022 else
30024 char buf[30];
30025 const char *label_name;
30026 rtx fun;
30028 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30029 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30030 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30032 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30033 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
30036 else if (DEFAULT_ABI == ABI_DARWIN)
30038 const char *mcount_name = RS6000_MCOUNT;
30039 int caller_addr_regno = LR_REGNO;
30041 /* Be conservative and always set this, at least for now. */
30042 crtl->uses_pic_offset_table = 1;
30044 #if TARGET_MACHO
30045 /* For PIC code, set up a stub and collect the caller's address
30046 from r0, which is where the prologue puts it. */
30047 if (MACHOPIC_INDIRECT
30048 && crtl->uses_pic_offset_table)
30049 caller_addr_regno = 0;
30050 #endif
30051 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30052 LCT_NORMAL, VOIDmode, 1,
30053 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30057 /* Write function profiler code. */
30059 void
30060 output_function_profiler (FILE *file, int labelno)
30062 char buf[100];
30064 switch (DEFAULT_ABI)
30066 default:
30067 gcc_unreachable ();
30069 case ABI_V4:
30070 if (!TARGET_32BIT)
30072 warning (0, "no profiling of 64-bit code for this ABI");
30073 return;
30075 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30076 fprintf (file, "\tmflr %s\n", reg_names[0]);
30077 if (NO_PROFILE_COUNTERS)
30079 asm_fprintf (file, "\tstw %s,4(%s)\n",
30080 reg_names[0], reg_names[1]);
30082 else if (TARGET_SECURE_PLT && flag_pic)
30084 if (TARGET_LINK_STACK)
30086 char name[32];
30087 get_ppc476_thunk_name (name);
30088 asm_fprintf (file, "\tbl %s\n", name);
30090 else
30091 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30092 asm_fprintf (file, "\tstw %s,4(%s)\n",
30093 reg_names[0], reg_names[1]);
30094 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30095 asm_fprintf (file, "\taddis %s,%s,",
30096 reg_names[12], reg_names[12]);
30097 assemble_name (file, buf);
30098 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30099 assemble_name (file, buf);
30100 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30102 else if (flag_pic == 1)
30104 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30105 asm_fprintf (file, "\tstw %s,4(%s)\n",
30106 reg_names[0], reg_names[1]);
30107 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30108 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30109 assemble_name (file, buf);
30110 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30112 else if (flag_pic > 1)
30114 asm_fprintf (file, "\tstw %s,4(%s)\n",
30115 reg_names[0], reg_names[1]);
30116 /* Now, we need to get the address of the label. */
30117 if (TARGET_LINK_STACK)
30119 char name[32];
30120 get_ppc476_thunk_name (name);
30121 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30122 assemble_name (file, buf);
30123 fputs ("-.\n1:", file);
30124 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30125 asm_fprintf (file, "\taddi %s,%s,4\n",
30126 reg_names[11], reg_names[11]);
30128 else
30130 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30131 assemble_name (file, buf);
30132 fputs ("-.\n1:", file);
30133 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30135 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30136 reg_names[0], reg_names[11]);
30137 asm_fprintf (file, "\tadd %s,%s,%s\n",
30138 reg_names[0], reg_names[0], reg_names[11]);
30140 else
30142 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30143 assemble_name (file, buf);
30144 fputs ("@ha\n", file);
30145 asm_fprintf (file, "\tstw %s,4(%s)\n",
30146 reg_names[0], reg_names[1]);
30147 asm_fprintf (file, "\tla %s,", reg_names[0]);
30148 assemble_name (file, buf);
30149 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30152 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30153 fprintf (file, "\tbl %s%s\n",
30154 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30155 break;
30157 case ABI_AIX:
30158 case ABI_ELFv2:
30159 case ABI_DARWIN:
30160 /* Don't do anything, done in output_profile_hook (). */
30161 break;
30167 /* The following variable value is the last issued insn. */
30169 static rtx_insn *last_scheduled_insn;
30171 /* The following variable helps to balance issuing of load and
30172 store instructions */
30174 static int load_store_pendulum;
30176 /* The following variable helps pair divide insns during scheduling. */
30177 static int divide_cnt;
30178 /* The following variable helps pair and alternate vector and vector load
30179 insns during scheduling. */
30180 static int vec_pairing;
30183 /* Power4 load update and store update instructions are cracked into a
30184 load or store and an integer insn which are executed in the same cycle.
30185 Branches have their own dispatch slot which does not count against the
30186 GCC issue rate, but it changes the program flow so there are no other
30187 instructions to issue in this cycle. */
30189 static int
30190 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30192 last_scheduled_insn = insn;
30193 if (GET_CODE (PATTERN (insn)) == USE
30194 || GET_CODE (PATTERN (insn)) == CLOBBER)
30196 cached_can_issue_more = more;
30197 return cached_can_issue_more;
30200 if (insn_terminates_group_p (insn, current_group))
30202 cached_can_issue_more = 0;
30203 return cached_can_issue_more;
30206 /* If no reservation, but reach here */
30207 if (recog_memoized (insn) < 0)
30208 return more;
30210 if (rs6000_sched_groups)
30212 if (is_microcoded_insn (insn))
30213 cached_can_issue_more = 0;
30214 else if (is_cracked_insn (insn))
30215 cached_can_issue_more = more > 2 ? more - 2 : 0;
30216 else
30217 cached_can_issue_more = more - 1;
30219 return cached_can_issue_more;
30222 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
30223 return 0;
30225 cached_can_issue_more = more - 1;
30226 return cached_can_issue_more;
30229 static int
30230 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30232 int r = rs6000_variable_issue_1 (insn, more);
30233 if (verbose)
30234 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30235 return r;
30238 /* Adjust the cost of a scheduling dependency. Return the new cost of
30239 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30241 static int
30242 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
30243 unsigned int)
30245 enum attr_type attr_type;
30247 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30248 return cost;
30250 switch (dep_type)
30252 case REG_DEP_TRUE:
30254 /* Data dependency; DEP_INSN writes a register that INSN reads
30255 some cycles later. */
30257 /* Separate a load from a narrower, dependent store. */
30258 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
30259 && GET_CODE (PATTERN (insn)) == SET
30260 && GET_CODE (PATTERN (dep_insn)) == SET
30261 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
30262 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
30263 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30264 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30265 return cost + 14;
30267 attr_type = get_attr_type (insn);
30269 switch (attr_type)
30271 case TYPE_JMPREG:
30272 /* Tell the first scheduling pass about the latency between
30273 a mtctr and bctr (and mtlr and br/blr). The first
30274 scheduling pass will not know about this latency since
30275 the mtctr instruction, which has the latency associated
30276 to it, will be generated by reload. */
30277 return 4;
30278 case TYPE_BRANCH:
30279 /* Leave some extra cycles between a compare and its
30280 dependent branch, to inhibit expensive mispredicts. */
30281 if ((rs6000_cpu_attr == CPU_PPC603
30282 || rs6000_cpu_attr == CPU_PPC604
30283 || rs6000_cpu_attr == CPU_PPC604E
30284 || rs6000_cpu_attr == CPU_PPC620
30285 || rs6000_cpu_attr == CPU_PPC630
30286 || rs6000_cpu_attr == CPU_PPC750
30287 || rs6000_cpu_attr == CPU_PPC7400
30288 || rs6000_cpu_attr == CPU_PPC7450
30289 || rs6000_cpu_attr == CPU_PPCE5500
30290 || rs6000_cpu_attr == CPU_PPCE6500
30291 || rs6000_cpu_attr == CPU_POWER4
30292 || rs6000_cpu_attr == CPU_POWER5
30293 || rs6000_cpu_attr == CPU_POWER7
30294 || rs6000_cpu_attr == CPU_POWER8
30295 || rs6000_cpu_attr == CPU_POWER9
30296 || rs6000_cpu_attr == CPU_CELL)
30297 && recog_memoized (dep_insn)
30298 && (INSN_CODE (dep_insn) >= 0))
30300 switch (get_attr_type (dep_insn))
30302 case TYPE_CMP:
30303 case TYPE_FPCOMPARE:
30304 case TYPE_CR_LOGICAL:
30305 case TYPE_DELAYED_CR:
30306 return cost + 2;
30307 case TYPE_EXTS:
30308 case TYPE_MUL:
30309 if (get_attr_dot (dep_insn) == DOT_YES)
30310 return cost + 2;
30311 else
30312 break;
30313 case TYPE_SHIFT:
30314 if (get_attr_dot (dep_insn) == DOT_YES
30315 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30316 return cost + 2;
30317 else
30318 break;
30319 default:
30320 break;
30322 break;
30324 case TYPE_STORE:
30325 case TYPE_FPSTORE:
30326 if ((rs6000_cpu == PROCESSOR_POWER6)
30327 && recog_memoized (dep_insn)
30328 && (INSN_CODE (dep_insn) >= 0))
30331 if (GET_CODE (PATTERN (insn)) != SET)
30332 /* If this happens, we have to extend this to schedule
30333 optimally. Return default for now. */
30334 return cost;
30336 /* Adjust the cost for the case where the value written
30337 by a fixed point operation is used as the address
30338 gen value on a store. */
30339 switch (get_attr_type (dep_insn))
30341 case TYPE_LOAD:
30342 case TYPE_CNTLZ:
30344 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30345 return get_attr_sign_extend (dep_insn)
30346 == SIGN_EXTEND_YES ? 6 : 4;
30347 break;
30349 case TYPE_SHIFT:
30351 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30352 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30353 6 : 3;
30354 break;
30356 case TYPE_INTEGER:
30357 case TYPE_ADD:
30358 case TYPE_LOGICAL:
30359 case TYPE_EXTS:
30360 case TYPE_INSERT:
30362 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30363 return 3;
30364 break;
30366 case TYPE_STORE:
30367 case TYPE_FPLOAD:
30368 case TYPE_FPSTORE:
30370 if (get_attr_update (dep_insn) == UPDATE_YES
30371 && ! rs6000_store_data_bypass_p (dep_insn, insn))
30372 return 3;
30373 break;
30375 case TYPE_MUL:
30377 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30378 return 17;
30379 break;
30381 case TYPE_DIV:
30383 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30384 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30385 break;
30387 default:
30388 break;
30391 break;
30393 case TYPE_LOAD:
30394 if ((rs6000_cpu == PROCESSOR_POWER6)
30395 && recog_memoized (dep_insn)
30396 && (INSN_CODE (dep_insn) >= 0))
30399 /* Adjust the cost for the case where the value written
30400 by a fixed point instruction is used within the address
30401 gen portion of a subsequent load(u)(x) */
30402 switch (get_attr_type (dep_insn))
30404 case TYPE_LOAD:
30405 case TYPE_CNTLZ:
30407 if (set_to_load_agen (dep_insn, insn))
30408 return get_attr_sign_extend (dep_insn)
30409 == SIGN_EXTEND_YES ? 6 : 4;
30410 break;
30412 case TYPE_SHIFT:
30414 if (set_to_load_agen (dep_insn, insn))
30415 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30416 6 : 3;
30417 break;
30419 case TYPE_INTEGER:
30420 case TYPE_ADD:
30421 case TYPE_LOGICAL:
30422 case TYPE_EXTS:
30423 case TYPE_INSERT:
30425 if (set_to_load_agen (dep_insn, insn))
30426 return 3;
30427 break;
30429 case TYPE_STORE:
30430 case TYPE_FPLOAD:
30431 case TYPE_FPSTORE:
30433 if (get_attr_update (dep_insn) == UPDATE_YES
30434 && set_to_load_agen (dep_insn, insn))
30435 return 3;
30436 break;
30438 case TYPE_MUL:
30440 if (set_to_load_agen (dep_insn, insn))
30441 return 17;
30442 break;
30444 case TYPE_DIV:
30446 if (set_to_load_agen (dep_insn, insn))
30447 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30448 break;
30450 default:
30451 break;
30454 break;
30456 case TYPE_FPLOAD:
30457 if ((rs6000_cpu == PROCESSOR_POWER6)
30458 && get_attr_update (insn) == UPDATE_NO
30459 && recog_memoized (dep_insn)
30460 && (INSN_CODE (dep_insn) >= 0)
30461 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30462 return 2;
30464 default:
30465 break;
30468 /* Fall out to return default cost. */
30470 break;
30472 case REG_DEP_OUTPUT:
30473 /* Output dependency; DEP_INSN writes a register that INSN writes some
30474 cycles later. */
30475 if ((rs6000_cpu == PROCESSOR_POWER6)
30476 && recog_memoized (dep_insn)
30477 && (INSN_CODE (dep_insn) >= 0))
30479 attr_type = get_attr_type (insn);
30481 switch (attr_type)
30483 case TYPE_FP:
30484 case TYPE_FPSIMPLE:
30485 if (get_attr_type (dep_insn) == TYPE_FP
30486 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30487 return 1;
30488 break;
30489 case TYPE_FPLOAD:
30490 if (get_attr_update (insn) == UPDATE_NO
30491 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30492 return 2;
30493 break;
30494 default:
30495 break;
30498 /* Fall through, no cost for output dependency. */
30499 /* FALLTHRU */
30501 case REG_DEP_ANTI:
30502 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30503 cycles later. */
30504 return 0;
30506 default:
30507 gcc_unreachable ();
30510 return cost;
30513 /* Debug version of rs6000_adjust_cost. */
30515 static int
30516 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30517 int cost, unsigned int dw)
30519 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30521 if (ret != cost)
30523 const char *dep;
30525 switch (dep_type)
30527 default: dep = "unknown depencency"; break;
30528 case REG_DEP_TRUE: dep = "data dependency"; break;
30529 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30530 case REG_DEP_ANTI: dep = "anti depencency"; break;
30533 fprintf (stderr,
30534 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30535 "%s, insn:\n", ret, cost, dep);
30537 debug_rtx (insn);
30540 return ret;
30543 /* The function returns a true if INSN is microcoded.
30544 Return false otherwise. */
30546 static bool
30547 is_microcoded_insn (rtx_insn *insn)
30549 if (!insn || !NONDEBUG_INSN_P (insn)
30550 || GET_CODE (PATTERN (insn)) == USE
30551 || GET_CODE (PATTERN (insn)) == CLOBBER)
30552 return false;
30554 if (rs6000_cpu_attr == CPU_CELL)
30555 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30557 if (rs6000_sched_groups
30558 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30560 enum attr_type type = get_attr_type (insn);
30561 if ((type == TYPE_LOAD
30562 && get_attr_update (insn) == UPDATE_YES
30563 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30564 || ((type == TYPE_LOAD || type == TYPE_STORE)
30565 && get_attr_update (insn) == UPDATE_YES
30566 && get_attr_indexed (insn) == INDEXED_YES)
30567 || type == TYPE_MFCR)
30568 return true;
30571 return false;
30574 /* The function returns true if INSN is cracked into 2 instructions
30575 by the processor (and therefore occupies 2 issue slots). */
30577 static bool
30578 is_cracked_insn (rtx_insn *insn)
30580 if (!insn || !NONDEBUG_INSN_P (insn)
30581 || GET_CODE (PATTERN (insn)) == USE
30582 || GET_CODE (PATTERN (insn)) == CLOBBER)
30583 return false;
30585 if (rs6000_sched_groups
30586 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30588 enum attr_type type = get_attr_type (insn);
30589 if ((type == TYPE_LOAD
30590 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30591 && get_attr_update (insn) == UPDATE_NO)
30592 || (type == TYPE_LOAD
30593 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30594 && get_attr_update (insn) == UPDATE_YES
30595 && get_attr_indexed (insn) == INDEXED_NO)
30596 || (type == TYPE_STORE
30597 && get_attr_update (insn) == UPDATE_YES
30598 && get_attr_indexed (insn) == INDEXED_NO)
30599 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30600 && get_attr_update (insn) == UPDATE_YES)
30601 || type == TYPE_DELAYED_CR
30602 || (type == TYPE_EXTS
30603 && get_attr_dot (insn) == DOT_YES)
30604 || (type == TYPE_SHIFT
30605 && get_attr_dot (insn) == DOT_YES
30606 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30607 || (type == TYPE_MUL
30608 && get_attr_dot (insn) == DOT_YES)
30609 || type == TYPE_DIV
30610 || (type == TYPE_INSERT
30611 && get_attr_size (insn) == SIZE_32))
30612 return true;
30615 return false;
30618 /* The function returns true if INSN can be issued only from
30619 the branch slot. */
30621 static bool
30622 is_branch_slot_insn (rtx_insn *insn)
30624 if (!insn || !NONDEBUG_INSN_P (insn)
30625 || GET_CODE (PATTERN (insn)) == USE
30626 || GET_CODE (PATTERN (insn)) == CLOBBER)
30627 return false;
30629 if (rs6000_sched_groups)
30631 enum attr_type type = get_attr_type (insn);
30632 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30633 return true;
30634 return false;
30637 return false;
30640 /* The function returns true if out_inst sets a value that is
30641 used in the address generation computation of in_insn */
30642 static bool
30643 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30645 rtx out_set, in_set;
30647 /* For performance reasons, only handle the simple case where
30648 both loads are a single_set. */
30649 out_set = single_set (out_insn);
30650 if (out_set)
30652 in_set = single_set (in_insn);
30653 if (in_set)
30654 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30657 return false;
30660 /* Try to determine base/offset/size parts of the given MEM.
30661 Return true if successful, false if all the values couldn't
30662 be determined.
30664 This function only looks for REG or REG+CONST address forms.
30665 REG+REG address form will return false. */
30667 static bool
30668 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30669 HOST_WIDE_INT *size)
30671 rtx addr_rtx;
30672 if MEM_SIZE_KNOWN_P (mem)
30673 *size = MEM_SIZE (mem);
30674 else
30675 return false;
30677 addr_rtx = (XEXP (mem, 0));
30678 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30679 addr_rtx = XEXP (addr_rtx, 1);
30681 *offset = 0;
30682 while (GET_CODE (addr_rtx) == PLUS
30683 && CONST_INT_P (XEXP (addr_rtx, 1)))
30685 *offset += INTVAL (XEXP (addr_rtx, 1));
30686 addr_rtx = XEXP (addr_rtx, 0);
30688 if (!REG_P (addr_rtx))
30689 return false;
30691 *base = addr_rtx;
30692 return true;
30695 /* The function returns true if the target storage location of
30696 mem1 is adjacent to the target storage location of mem2 */
30697 /* Return 1 if memory locations are adjacent. */
30699 static bool
30700 adjacent_mem_locations (rtx mem1, rtx mem2)
30702 rtx reg1, reg2;
30703 HOST_WIDE_INT off1, size1, off2, size2;
30705 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30706 && get_memref_parts (mem2, &reg2, &off2, &size2))
30707 return ((REGNO (reg1) == REGNO (reg2))
30708 && ((off1 + size1 == off2)
30709 || (off2 + size2 == off1)));
30711 return false;
30714 /* This function returns true if it can be determined that the two MEM
30715 locations overlap by at least 1 byte based on base reg/offset/size. */
30717 static bool
30718 mem_locations_overlap (rtx mem1, rtx mem2)
30720 rtx reg1, reg2;
30721 HOST_WIDE_INT off1, size1, off2, size2;
30723 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30724 && get_memref_parts (mem2, &reg2, &off2, &size2))
30725 return ((REGNO (reg1) == REGNO (reg2))
30726 && (((off1 <= off2) && (off1 + size1 > off2))
30727 || ((off2 <= off1) && (off2 + size2 > off1))));
30729 return false;
30732 /* A C statement (sans semicolon) to update the integer scheduling
30733 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30734 INSN earlier, reduce the priority to execute INSN later. Do not
30735 define this macro if you do not need to adjust the scheduling
30736 priorities of insns. */
30738 static int
30739 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30741 rtx load_mem, str_mem;
30742 /* On machines (like the 750) which have asymmetric integer units,
30743 where one integer unit can do multiply and divides and the other
30744 can't, reduce the priority of multiply/divide so it is scheduled
30745 before other integer operations. */
30747 #if 0
30748 if (! INSN_P (insn))
30749 return priority;
30751 if (GET_CODE (PATTERN (insn)) == USE)
30752 return priority;
30754 switch (rs6000_cpu_attr) {
30755 case CPU_PPC750:
30756 switch (get_attr_type (insn))
30758 default:
30759 break;
30761 case TYPE_MUL:
30762 case TYPE_DIV:
30763 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30764 priority, priority);
30765 if (priority >= 0 && priority < 0x01000000)
30766 priority >>= 3;
30767 break;
30770 #endif
30772 if (insn_must_be_first_in_group (insn)
30773 && reload_completed
30774 && current_sched_info->sched_max_insns_priority
30775 && rs6000_sched_restricted_insns_priority)
30778 /* Prioritize insns that can be dispatched only in the first
30779 dispatch slot. */
30780 if (rs6000_sched_restricted_insns_priority == 1)
30781 /* Attach highest priority to insn. This means that in
30782 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30783 precede 'priority' (critical path) considerations. */
30784 return current_sched_info->sched_max_insns_priority;
30785 else if (rs6000_sched_restricted_insns_priority == 2)
30786 /* Increase priority of insn by a minimal amount. This means that in
30787 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30788 considerations precede dispatch-slot restriction considerations. */
30789 return (priority + 1);
30792 if (rs6000_cpu == PROCESSOR_POWER6
30793 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30794 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30795 /* Attach highest priority to insn if the scheduler has just issued two
30796 stores and this instruction is a load, or two loads and this instruction
30797 is a store. Power6 wants loads and stores scheduled alternately
30798 when possible */
30799 return current_sched_info->sched_max_insns_priority;
30801 return priority;
30804 /* Return true if the instruction is nonpipelined on the Cell. */
30805 static bool
30806 is_nonpipeline_insn (rtx_insn *insn)
30808 enum attr_type type;
30809 if (!insn || !NONDEBUG_INSN_P (insn)
30810 || GET_CODE (PATTERN (insn)) == USE
30811 || GET_CODE (PATTERN (insn)) == CLOBBER)
30812 return false;
30814 type = get_attr_type (insn);
30815 if (type == TYPE_MUL
30816 || type == TYPE_DIV
30817 || type == TYPE_SDIV
30818 || type == TYPE_DDIV
30819 || type == TYPE_SSQRT
30820 || type == TYPE_DSQRT
30821 || type == TYPE_MFCR
30822 || type == TYPE_MFCRF
30823 || type == TYPE_MFJMPR)
30825 return true;
30827 return false;
30831 /* Return how many instructions the machine can issue per cycle. */
30833 static int
30834 rs6000_issue_rate (void)
30836 /* Unless scheduling for register pressure, use issue rate of 1 for
30837 first scheduling pass to decrease degradation. */
30838 if (!reload_completed && !flag_sched_pressure)
30839 return 1;
30841 switch (rs6000_cpu_attr) {
30842 case CPU_RS64A:
30843 case CPU_PPC601: /* ? */
30844 case CPU_PPC7450:
30845 return 3;
30846 case CPU_PPC440:
30847 case CPU_PPC603:
30848 case CPU_PPC750:
30849 case CPU_PPC7400:
30850 case CPU_PPC8540:
30851 case CPU_PPC8548:
30852 case CPU_CELL:
30853 case CPU_PPCE300C2:
30854 case CPU_PPCE300C3:
30855 case CPU_PPCE500MC:
30856 case CPU_PPCE500MC64:
30857 case CPU_PPCE5500:
30858 case CPU_PPCE6500:
30859 case CPU_TITAN:
30860 return 2;
30861 case CPU_PPC476:
30862 case CPU_PPC604:
30863 case CPU_PPC604E:
30864 case CPU_PPC620:
30865 case CPU_PPC630:
30866 return 4;
30867 case CPU_POWER4:
30868 case CPU_POWER5:
30869 case CPU_POWER6:
30870 case CPU_POWER7:
30871 return 5;
30872 case CPU_POWER8:
30873 return 7;
30874 case CPU_POWER9:
30875 return 6;
30876 default:
30877 return 1;
30881 /* Return how many instructions to look ahead for better insn
30882 scheduling. */
30884 static int
30885 rs6000_use_sched_lookahead (void)
30887 switch (rs6000_cpu_attr)
30889 case CPU_PPC8540:
30890 case CPU_PPC8548:
30891 return 4;
30893 case CPU_CELL:
30894 return (reload_completed ? 8 : 0);
30896 default:
30897 return 0;
30901 /* We are choosing insn from the ready queue. Return zero if INSN can be
30902 chosen. */
30903 static int
30904 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
30906 if (ready_index == 0)
30907 return 0;
30909 if (rs6000_cpu_attr != CPU_CELL)
30910 return 0;
30912 gcc_assert (insn != NULL_RTX && INSN_P (insn));
30914 if (!reload_completed
30915 || is_nonpipeline_insn (insn)
30916 || is_microcoded_insn (insn))
30917 return 1;
30919 return 0;
30922 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
30923 and return true. */
30925 static bool
30926 find_mem_ref (rtx pat, rtx *mem_ref)
30928 const char * fmt;
30929 int i, j;
30931 /* stack_tie does not produce any real memory traffic. */
30932 if (tie_operand (pat, VOIDmode))
30933 return false;
30935 if (GET_CODE (pat) == MEM)
30937 *mem_ref = pat;
30938 return true;
30941 /* Recursively process the pattern. */
30942 fmt = GET_RTX_FORMAT (GET_CODE (pat));
30944 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
30946 if (fmt[i] == 'e')
30948 if (find_mem_ref (XEXP (pat, i), mem_ref))
30949 return true;
30951 else if (fmt[i] == 'E')
30952 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
30954 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
30955 return true;
30959 return false;
30962 /* Determine if PAT is a PATTERN of a load insn. */
30964 static bool
30965 is_load_insn1 (rtx pat, rtx *load_mem)
30967 if (!pat || pat == NULL_RTX)
30968 return false;
30970 if (GET_CODE (pat) == SET)
30971 return find_mem_ref (SET_SRC (pat), load_mem);
30973 if (GET_CODE (pat) == PARALLEL)
30975 int i;
30977 for (i = 0; i < XVECLEN (pat, 0); i++)
30978 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
30979 return true;
30982 return false;
30985 /* Determine if INSN loads from memory. */
30987 static bool
30988 is_load_insn (rtx insn, rtx *load_mem)
30990 if (!insn || !INSN_P (insn))
30991 return false;
30993 if (CALL_P (insn))
30994 return false;
30996 return is_load_insn1 (PATTERN (insn), load_mem);
30999 /* Determine if PAT is a PATTERN of a store insn. */
31001 static bool
31002 is_store_insn1 (rtx pat, rtx *str_mem)
31004 if (!pat || pat == NULL_RTX)
31005 return false;
31007 if (GET_CODE (pat) == SET)
31008 return find_mem_ref (SET_DEST (pat), str_mem);
31010 if (GET_CODE (pat) == PARALLEL)
31012 int i;
31014 for (i = 0; i < XVECLEN (pat, 0); i++)
31015 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
31016 return true;
31019 return false;
31022 /* Determine if INSN stores to memory. */
31024 static bool
31025 is_store_insn (rtx insn, rtx *str_mem)
31027 if (!insn || !INSN_P (insn))
31028 return false;
31030 return is_store_insn1 (PATTERN (insn), str_mem);
31033 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31035 static bool
31036 is_power9_pairable_vec_type (enum attr_type type)
31038 switch (type)
31040 case TYPE_VECSIMPLE:
31041 case TYPE_VECCOMPLEX:
31042 case TYPE_VECDIV:
31043 case TYPE_VECCMP:
31044 case TYPE_VECPERM:
31045 case TYPE_VECFLOAT:
31046 case TYPE_VECFDIV:
31047 case TYPE_VECDOUBLE:
31048 return true;
31049 default:
31050 break;
31052 return false;
31055 /* Returns whether the dependence between INSN and NEXT is considered
31056 costly by the given target. */
31058 static bool
31059 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31061 rtx insn;
31062 rtx next;
31063 rtx load_mem, str_mem;
31065 /* If the flag is not enabled - no dependence is considered costly;
31066 allow all dependent insns in the same group.
31067 This is the most aggressive option. */
31068 if (rs6000_sched_costly_dep == no_dep_costly)
31069 return false;
31071 /* If the flag is set to 1 - a dependence is always considered costly;
31072 do not allow dependent instructions in the same group.
31073 This is the most conservative option. */
31074 if (rs6000_sched_costly_dep == all_deps_costly)
31075 return true;
31077 insn = DEP_PRO (dep);
31078 next = DEP_CON (dep);
31080 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31081 && is_load_insn (next, &load_mem)
31082 && is_store_insn (insn, &str_mem))
31083 /* Prevent load after store in the same group. */
31084 return true;
31086 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31087 && is_load_insn (next, &load_mem)
31088 && is_store_insn (insn, &str_mem)
31089 && DEP_TYPE (dep) == REG_DEP_TRUE
31090 && mem_locations_overlap(str_mem, load_mem))
31091 /* Prevent load after store in the same group if it is a true
31092 dependence. */
31093 return true;
31095 /* The flag is set to X; dependences with latency >= X are considered costly,
31096 and will not be scheduled in the same group. */
31097 if (rs6000_sched_costly_dep <= max_dep_latency
31098 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31099 return true;
31101 return false;
31104 /* Return the next insn after INSN that is found before TAIL is reached,
31105 skipping any "non-active" insns - insns that will not actually occupy
31106 an issue slot. Return NULL_RTX if such an insn is not found. */
31108 static rtx_insn *
31109 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31111 if (insn == NULL_RTX || insn == tail)
31112 return NULL;
31114 while (1)
31116 insn = NEXT_INSN (insn);
31117 if (insn == NULL_RTX || insn == tail)
31118 return NULL;
31120 if (CALL_P (insn)
31121 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31122 || (NONJUMP_INSN_P (insn)
31123 && GET_CODE (PATTERN (insn)) != USE
31124 && GET_CODE (PATTERN (insn)) != CLOBBER
31125 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31126 break;
31128 return insn;
31131 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31133 static int
31134 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31136 int pos;
31137 int i;
31138 rtx_insn *tmp;
31139 enum attr_type type, type2;
31141 type = get_attr_type (last_scheduled_insn);
31143 /* Try to issue fixed point divides back-to-back in pairs so they will be
31144 routed to separate execution units and execute in parallel. */
31145 if (type == TYPE_DIV && divide_cnt == 0)
31147 /* First divide has been scheduled. */
31148 divide_cnt = 1;
31150 /* Scan the ready list looking for another divide, if found move it
31151 to the end of the list so it is chosen next. */
31152 pos = lastpos;
31153 while (pos >= 0)
31155 if (recog_memoized (ready[pos]) >= 0
31156 && get_attr_type (ready[pos]) == TYPE_DIV)
31158 tmp = ready[pos];
31159 for (i = pos; i < lastpos; i++)
31160 ready[i] = ready[i + 1];
31161 ready[lastpos] = tmp;
31162 break;
31164 pos--;
31167 else
31169 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31170 divide_cnt = 0;
31172 /* The best dispatch throughput for vector and vector load insns can be
31173 achieved by interleaving a vector and vector load such that they'll
31174 dispatch to the same superslice. If this pairing cannot be achieved
31175 then it is best to pair vector insns together and vector load insns
31176 together.
31178 To aid in this pairing, vec_pairing maintains the current state with
31179 the following values:
31181 0 : Initial state, no vecload/vector pairing has been started.
31183 1 : A vecload or vector insn has been issued and a candidate for
31184 pairing has been found and moved to the end of the ready
31185 list. */
31186 if (type == TYPE_VECLOAD)
31188 /* Issued a vecload. */
31189 if (vec_pairing == 0)
31191 int vecload_pos = -1;
31192 /* We issued a single vecload, look for a vector insn to pair it
31193 with. If one isn't found, try to pair another vecload. */
31194 pos = lastpos;
31195 while (pos >= 0)
31197 if (recog_memoized (ready[pos]) >= 0)
31199 type2 = get_attr_type (ready[pos]);
31200 if (is_power9_pairable_vec_type (type2))
31202 /* Found a vector insn to pair with, move it to the
31203 end of the ready list so it is scheduled next. */
31204 tmp = ready[pos];
31205 for (i = pos; i < lastpos; i++)
31206 ready[i] = ready[i + 1];
31207 ready[lastpos] = tmp;
31208 vec_pairing = 1;
31209 return cached_can_issue_more;
31211 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
31212 /* Remember position of first vecload seen. */
31213 vecload_pos = pos;
31215 pos--;
31217 if (vecload_pos >= 0)
31219 /* Didn't find a vector to pair with but did find a vecload,
31220 move it to the end of the ready list. */
31221 tmp = ready[vecload_pos];
31222 for (i = vecload_pos; i < lastpos; i++)
31223 ready[i] = ready[i + 1];
31224 ready[lastpos] = tmp;
31225 vec_pairing = 1;
31226 return cached_can_issue_more;
31230 else if (is_power9_pairable_vec_type (type))
31232 /* Issued a vector operation. */
31233 if (vec_pairing == 0)
31235 int vec_pos = -1;
31236 /* We issued a single vector insn, look for a vecload to pair it
31237 with. If one isn't found, try to pair another vector. */
31238 pos = lastpos;
31239 while (pos >= 0)
31241 if (recog_memoized (ready[pos]) >= 0)
31243 type2 = get_attr_type (ready[pos]);
31244 if (type2 == TYPE_VECLOAD)
31246 /* Found a vecload insn to pair with, move it to the
31247 end of the ready list so it is scheduled next. */
31248 tmp = ready[pos];
31249 for (i = pos; i < lastpos; i++)
31250 ready[i] = ready[i + 1];
31251 ready[lastpos] = tmp;
31252 vec_pairing = 1;
31253 return cached_can_issue_more;
31255 else if (is_power9_pairable_vec_type (type2)
31256 && vec_pos == -1)
31257 /* Remember position of first vector insn seen. */
31258 vec_pos = pos;
31260 pos--;
31262 if (vec_pos >= 0)
31264 /* Didn't find a vecload to pair with but did find a vector
31265 insn, move it to the end of the ready list. */
31266 tmp = ready[vec_pos];
31267 for (i = vec_pos; i < lastpos; i++)
31268 ready[i] = ready[i + 1];
31269 ready[lastpos] = tmp;
31270 vec_pairing = 1;
31271 return cached_can_issue_more;
31276 /* We've either finished a vec/vecload pair, couldn't find an insn to
31277 continue the current pair, or the last insn had nothing to do with
31278 with pairing. In any case, reset the state. */
31279 vec_pairing = 0;
31282 return cached_can_issue_more;
31285 /* We are about to begin issuing insns for this clock cycle. */
31287 static int
31288 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31289 rtx_insn **ready ATTRIBUTE_UNUSED,
31290 int *pn_ready ATTRIBUTE_UNUSED,
31291 int clock_var ATTRIBUTE_UNUSED)
31293 int n_ready = *pn_ready;
31295 if (sched_verbose)
31296 fprintf (dump, "// rs6000_sched_reorder :\n");
31298 /* Reorder the ready list, if the second to last ready insn
31299 is a nonepipeline insn. */
31300 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
31302 if (is_nonpipeline_insn (ready[n_ready - 1])
31303 && (recog_memoized (ready[n_ready - 2]) > 0))
31304 /* Simply swap first two insns. */
31305 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31308 if (rs6000_cpu == PROCESSOR_POWER6)
31309 load_store_pendulum = 0;
31311 return rs6000_issue_rate ();
31314 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31316 static int
31317 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31318 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31320 if (sched_verbose)
31321 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31323 /* For Power6, we need to handle some special cases to try and keep the
31324 store queue from overflowing and triggering expensive flushes.
31326 This code monitors how load and store instructions are being issued
31327 and skews the ready list one way or the other to increase the likelihood
31328 that a desired instruction is issued at the proper time.
31330 A couple of things are done. First, we maintain a "load_store_pendulum"
31331 to track the current state of load/store issue.
31333 - If the pendulum is at zero, then no loads or stores have been
31334 issued in the current cycle so we do nothing.
31336 - If the pendulum is 1, then a single load has been issued in this
31337 cycle and we attempt to locate another load in the ready list to
31338 issue with it.
31340 - If the pendulum is -2, then two stores have already been
31341 issued in this cycle, so we increase the priority of the first load
31342 in the ready list to increase it's likelihood of being chosen first
31343 in the next cycle.
31345 - If the pendulum is -1, then a single store has been issued in this
31346 cycle and we attempt to locate another store in the ready list to
31347 issue with it, preferring a store to an adjacent memory location to
31348 facilitate store pairing in the store queue.
31350 - If the pendulum is 2, then two loads have already been
31351 issued in this cycle, so we increase the priority of the first store
31352 in the ready list to increase it's likelihood of being chosen first
31353 in the next cycle.
31355 - If the pendulum < -2 or > 2, then do nothing.
31357 Note: This code covers the most common scenarios. There exist non
31358 load/store instructions which make use of the LSU and which
31359 would need to be accounted for to strictly model the behavior
31360 of the machine. Those instructions are currently unaccounted
31361 for to help minimize compile time overhead of this code.
31363 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
31365 int pos;
31366 int i;
31367 rtx_insn *tmp;
31368 rtx load_mem, str_mem;
31370 if (is_store_insn (last_scheduled_insn, &str_mem))
31371 /* Issuing a store, swing the load_store_pendulum to the left */
31372 load_store_pendulum--;
31373 else if (is_load_insn (last_scheduled_insn, &load_mem))
31374 /* Issuing a load, swing the load_store_pendulum to the right */
31375 load_store_pendulum++;
31376 else
31377 return cached_can_issue_more;
31379 /* If the pendulum is balanced, or there is only one instruction on
31380 the ready list, then all is well, so return. */
31381 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31382 return cached_can_issue_more;
31384 if (load_store_pendulum == 1)
31386 /* A load has been issued in this cycle. Scan the ready list
31387 for another load to issue with it */
31388 pos = *pn_ready-1;
31390 while (pos >= 0)
31392 if (is_load_insn (ready[pos], &load_mem))
31394 /* Found a load. Move it to the head of the ready list,
31395 and adjust it's priority so that it is more likely to
31396 stay there */
31397 tmp = ready[pos];
31398 for (i=pos; i<*pn_ready-1; i++)
31399 ready[i] = ready[i + 1];
31400 ready[*pn_ready-1] = tmp;
31402 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31403 INSN_PRIORITY (tmp)++;
31404 break;
31406 pos--;
31409 else if (load_store_pendulum == -2)
31411 /* Two stores have been issued in this cycle. Increase the
31412 priority of the first load in the ready list to favor it for
31413 issuing in the next cycle. */
31414 pos = *pn_ready-1;
31416 while (pos >= 0)
31418 if (is_load_insn (ready[pos], &load_mem)
31419 && !sel_sched_p ()
31420 && INSN_PRIORITY_KNOWN (ready[pos]))
31422 INSN_PRIORITY (ready[pos])++;
31424 /* Adjust the pendulum to account for the fact that a load
31425 was found and increased in priority. This is to prevent
31426 increasing the priority of multiple loads */
31427 load_store_pendulum--;
31429 break;
31431 pos--;
31434 else if (load_store_pendulum == -1)
31436 /* A store has been issued in this cycle. Scan the ready list for
31437 another store to issue with it, preferring a store to an adjacent
31438 memory location */
31439 int first_store_pos = -1;
31441 pos = *pn_ready-1;
31443 while (pos >= 0)
31445 if (is_store_insn (ready[pos], &str_mem))
31447 rtx str_mem2;
31448 /* Maintain the index of the first store found on the
31449 list */
31450 if (first_store_pos == -1)
31451 first_store_pos = pos;
31453 if (is_store_insn (last_scheduled_insn, &str_mem2)
31454 && adjacent_mem_locations (str_mem, str_mem2))
31456 /* Found an adjacent store. Move it to the head of the
31457 ready list, and adjust it's priority so that it is
31458 more likely to stay there */
31459 tmp = ready[pos];
31460 for (i=pos; i<*pn_ready-1; i++)
31461 ready[i] = ready[i + 1];
31462 ready[*pn_ready-1] = tmp;
31464 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31465 INSN_PRIORITY (tmp)++;
31467 first_store_pos = -1;
31469 break;
31472 pos--;
31475 if (first_store_pos >= 0)
31477 /* An adjacent store wasn't found, but a non-adjacent store was,
31478 so move the non-adjacent store to the front of the ready
31479 list, and adjust its priority so that it is more likely to
31480 stay there. */
31481 tmp = ready[first_store_pos];
31482 for (i=first_store_pos; i<*pn_ready-1; i++)
31483 ready[i] = ready[i + 1];
31484 ready[*pn_ready-1] = tmp;
31485 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31486 INSN_PRIORITY (tmp)++;
31489 else if (load_store_pendulum == 2)
31491 /* Two loads have been issued in this cycle. Increase the priority
31492 of the first store in the ready list to favor it for issuing in
31493 the next cycle. */
31494 pos = *pn_ready-1;
31496 while (pos >= 0)
31498 if (is_store_insn (ready[pos], &str_mem)
31499 && !sel_sched_p ()
31500 && INSN_PRIORITY_KNOWN (ready[pos]))
31502 INSN_PRIORITY (ready[pos])++;
31504 /* Adjust the pendulum to account for the fact that a store
31505 was found and increased in priority. This is to prevent
31506 increasing the priority of multiple stores */
31507 load_store_pendulum++;
31509 break;
31511 pos--;
31516 /* Do Power9 dependent reordering if necessary. */
31517 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
31518 && recog_memoized (last_scheduled_insn) >= 0)
31519 return power9_sched_reorder2 (ready, *pn_ready - 1);
31521 return cached_can_issue_more;
31524 /* Return whether the presence of INSN causes a dispatch group termination
31525 of group WHICH_GROUP.
31527 If WHICH_GROUP == current_group, this function will return true if INSN
31528 causes the termination of the current group (i.e, the dispatch group to
31529 which INSN belongs). This means that INSN will be the last insn in the
31530 group it belongs to.
31532 If WHICH_GROUP == previous_group, this function will return true if INSN
31533 causes the termination of the previous group (i.e, the dispatch group that
31534 precedes the group to which INSN belongs). This means that INSN will be
31535 the first insn in the group it belongs to). */
31537 static bool
31538 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31540 bool first, last;
31542 if (! insn)
31543 return false;
31545 first = insn_must_be_first_in_group (insn);
31546 last = insn_must_be_last_in_group (insn);
31548 if (first && last)
31549 return true;
31551 if (which_group == current_group)
31552 return last;
31553 else if (which_group == previous_group)
31554 return first;
31556 return false;
31560 static bool
31561 insn_must_be_first_in_group (rtx_insn *insn)
31563 enum attr_type type;
31565 if (!insn
31566 || NOTE_P (insn)
31567 || DEBUG_INSN_P (insn)
31568 || GET_CODE (PATTERN (insn)) == USE
31569 || GET_CODE (PATTERN (insn)) == CLOBBER)
31570 return false;
31572 switch (rs6000_cpu)
31574 case PROCESSOR_POWER5:
31575 if (is_cracked_insn (insn))
31576 return true;
31577 /* FALLTHRU */
31578 case PROCESSOR_POWER4:
31579 if (is_microcoded_insn (insn))
31580 return true;
31582 if (!rs6000_sched_groups)
31583 return false;
31585 type = get_attr_type (insn);
31587 switch (type)
31589 case TYPE_MFCR:
31590 case TYPE_MFCRF:
31591 case TYPE_MTCR:
31592 case TYPE_DELAYED_CR:
31593 case TYPE_CR_LOGICAL:
31594 case TYPE_MTJMPR:
31595 case TYPE_MFJMPR:
31596 case TYPE_DIV:
31597 case TYPE_LOAD_L:
31598 case TYPE_STORE_C:
31599 case TYPE_ISYNC:
31600 case TYPE_SYNC:
31601 return true;
31602 default:
31603 break;
31605 break;
31606 case PROCESSOR_POWER6:
31607 type = get_attr_type (insn);
31609 switch (type)
31611 case TYPE_EXTS:
31612 case TYPE_CNTLZ:
31613 case TYPE_TRAP:
31614 case TYPE_MUL:
31615 case TYPE_INSERT:
31616 case TYPE_FPCOMPARE:
31617 case TYPE_MFCR:
31618 case TYPE_MTCR:
31619 case TYPE_MFJMPR:
31620 case TYPE_MTJMPR:
31621 case TYPE_ISYNC:
31622 case TYPE_SYNC:
31623 case TYPE_LOAD_L:
31624 case TYPE_STORE_C:
31625 return true;
31626 case TYPE_SHIFT:
31627 if (get_attr_dot (insn) == DOT_NO
31628 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31629 return true;
31630 else
31631 break;
31632 case TYPE_DIV:
31633 if (get_attr_size (insn) == SIZE_32)
31634 return true;
31635 else
31636 break;
31637 case TYPE_LOAD:
31638 case TYPE_STORE:
31639 case TYPE_FPLOAD:
31640 case TYPE_FPSTORE:
31641 if (get_attr_update (insn) == UPDATE_YES)
31642 return true;
31643 else
31644 break;
31645 default:
31646 break;
31648 break;
31649 case PROCESSOR_POWER7:
31650 type = get_attr_type (insn);
31652 switch (type)
31654 case TYPE_CR_LOGICAL:
31655 case TYPE_MFCR:
31656 case TYPE_MFCRF:
31657 case TYPE_MTCR:
31658 case TYPE_DIV:
31659 case TYPE_ISYNC:
31660 case TYPE_LOAD_L:
31661 case TYPE_STORE_C:
31662 case TYPE_MFJMPR:
31663 case TYPE_MTJMPR:
31664 return true;
31665 case TYPE_MUL:
31666 case TYPE_SHIFT:
31667 case TYPE_EXTS:
31668 if (get_attr_dot (insn) == DOT_YES)
31669 return true;
31670 else
31671 break;
31672 case TYPE_LOAD:
31673 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31674 || get_attr_update (insn) == UPDATE_YES)
31675 return true;
31676 else
31677 break;
31678 case TYPE_STORE:
31679 case TYPE_FPLOAD:
31680 case TYPE_FPSTORE:
31681 if (get_attr_update (insn) == UPDATE_YES)
31682 return true;
31683 else
31684 break;
31685 default:
31686 break;
31688 break;
31689 case PROCESSOR_POWER8:
31690 type = get_attr_type (insn);
31692 switch (type)
31694 case TYPE_CR_LOGICAL:
31695 case TYPE_DELAYED_CR:
31696 case TYPE_MFCR:
31697 case TYPE_MFCRF:
31698 case TYPE_MTCR:
31699 case TYPE_SYNC:
31700 case TYPE_ISYNC:
31701 case TYPE_LOAD_L:
31702 case TYPE_STORE_C:
31703 case TYPE_VECSTORE:
31704 case TYPE_MFJMPR:
31705 case TYPE_MTJMPR:
31706 return true;
31707 case TYPE_SHIFT:
31708 case TYPE_EXTS:
31709 case TYPE_MUL:
31710 if (get_attr_dot (insn) == DOT_YES)
31711 return true;
31712 else
31713 break;
31714 case TYPE_LOAD:
31715 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31716 || get_attr_update (insn) == UPDATE_YES)
31717 return true;
31718 else
31719 break;
31720 case TYPE_STORE:
31721 if (get_attr_update (insn) == UPDATE_YES
31722 && get_attr_indexed (insn) == INDEXED_YES)
31723 return true;
31724 else
31725 break;
31726 default:
31727 break;
31729 break;
31730 default:
31731 break;
31734 return false;
31737 static bool
31738 insn_must_be_last_in_group (rtx_insn *insn)
31740 enum attr_type type;
31742 if (!insn
31743 || NOTE_P (insn)
31744 || DEBUG_INSN_P (insn)
31745 || GET_CODE (PATTERN (insn)) == USE
31746 || GET_CODE (PATTERN (insn)) == CLOBBER)
31747 return false;
31749 switch (rs6000_cpu) {
31750 case PROCESSOR_POWER4:
31751 case PROCESSOR_POWER5:
31752 if (is_microcoded_insn (insn))
31753 return true;
31755 if (is_branch_slot_insn (insn))
31756 return true;
31758 break;
31759 case PROCESSOR_POWER6:
31760 type = get_attr_type (insn);
31762 switch (type)
31764 case TYPE_EXTS:
31765 case TYPE_CNTLZ:
31766 case TYPE_TRAP:
31767 case TYPE_MUL:
31768 case TYPE_FPCOMPARE:
31769 case TYPE_MFCR:
31770 case TYPE_MTCR:
31771 case TYPE_MFJMPR:
31772 case TYPE_MTJMPR:
31773 case TYPE_ISYNC:
31774 case TYPE_SYNC:
31775 case TYPE_LOAD_L:
31776 case TYPE_STORE_C:
31777 return true;
31778 case TYPE_SHIFT:
31779 if (get_attr_dot (insn) == DOT_NO
31780 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31781 return true;
31782 else
31783 break;
31784 case TYPE_DIV:
31785 if (get_attr_size (insn) == SIZE_32)
31786 return true;
31787 else
31788 break;
31789 default:
31790 break;
31792 break;
31793 case PROCESSOR_POWER7:
31794 type = get_attr_type (insn);
31796 switch (type)
31798 case TYPE_ISYNC:
31799 case TYPE_SYNC:
31800 case TYPE_LOAD_L:
31801 case TYPE_STORE_C:
31802 return true;
31803 case TYPE_LOAD:
31804 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31805 && get_attr_update (insn) == UPDATE_YES)
31806 return true;
31807 else
31808 break;
31809 case TYPE_STORE:
31810 if (get_attr_update (insn) == UPDATE_YES
31811 && get_attr_indexed (insn) == INDEXED_YES)
31812 return true;
31813 else
31814 break;
31815 default:
31816 break;
31818 break;
31819 case PROCESSOR_POWER8:
31820 type = get_attr_type (insn);
31822 switch (type)
31824 case TYPE_MFCR:
31825 case TYPE_MTCR:
31826 case TYPE_ISYNC:
31827 case TYPE_SYNC:
31828 case TYPE_LOAD_L:
31829 case TYPE_STORE_C:
31830 return true;
31831 case TYPE_LOAD:
31832 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31833 && get_attr_update (insn) == UPDATE_YES)
31834 return true;
31835 else
31836 break;
31837 case TYPE_STORE:
31838 if (get_attr_update (insn) == UPDATE_YES
31839 && get_attr_indexed (insn) == INDEXED_YES)
31840 return true;
31841 else
31842 break;
31843 default:
31844 break;
31846 break;
31847 default:
31848 break;
31851 return false;
31854 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
31855 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
31857 static bool
31858 is_costly_group (rtx *group_insns, rtx next_insn)
31860 int i;
31861 int issue_rate = rs6000_issue_rate ();
31863 for (i = 0; i < issue_rate; i++)
31865 sd_iterator_def sd_it;
31866 dep_t dep;
31867 rtx insn = group_insns[i];
31869 if (!insn)
31870 continue;
31872 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
31874 rtx next = DEP_CON (dep);
31876 if (next == next_insn
31877 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
31878 return true;
31882 return false;
31885 /* Utility of the function redefine_groups.
31886 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
31887 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
31888 to keep it "far" (in a separate group) from GROUP_INSNS, following
31889 one of the following schemes, depending on the value of the flag
31890 -minsert_sched_nops = X:
31891 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
31892 in order to force NEXT_INSN into a separate group.
31893 (2) X < sched_finish_regroup_exact: insert exactly X nops.
31894 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
31895 insertion (has a group just ended, how many vacant issue slots remain in the
31896 last group, and how many dispatch groups were encountered so far). */
31898 static int
31899 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
31900 rtx_insn *next_insn, bool *group_end, int can_issue_more,
31901 int *group_count)
31903 rtx nop;
31904 bool force;
31905 int issue_rate = rs6000_issue_rate ();
31906 bool end = *group_end;
31907 int i;
31909 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
31910 return can_issue_more;
31912 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
31913 return can_issue_more;
31915 force = is_costly_group (group_insns, next_insn);
31916 if (!force)
31917 return can_issue_more;
31919 if (sched_verbose > 6)
31920 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
31921 *group_count ,can_issue_more);
31923 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
31925 if (*group_end)
31926 can_issue_more = 0;
31928 /* Since only a branch can be issued in the last issue_slot, it is
31929 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
31930 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
31931 in this case the last nop will start a new group and the branch
31932 will be forced to the new group. */
31933 if (can_issue_more && !is_branch_slot_insn (next_insn))
31934 can_issue_more--;
31936 /* Do we have a special group ending nop? */
31937 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
31938 || rs6000_cpu_attr == CPU_POWER8)
31940 nop = gen_group_ending_nop ();
31941 emit_insn_before (nop, next_insn);
31942 can_issue_more = 0;
31944 else
31945 while (can_issue_more > 0)
31947 nop = gen_nop ();
31948 emit_insn_before (nop, next_insn);
31949 can_issue_more--;
31952 *group_end = true;
31953 return 0;
31956 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
31958 int n_nops = rs6000_sched_insert_nops;
31960 /* Nops can't be issued from the branch slot, so the effective
31961 issue_rate for nops is 'issue_rate - 1'. */
31962 if (can_issue_more == 0)
31963 can_issue_more = issue_rate;
31964 can_issue_more--;
31965 if (can_issue_more == 0)
31967 can_issue_more = issue_rate - 1;
31968 (*group_count)++;
31969 end = true;
31970 for (i = 0; i < issue_rate; i++)
31972 group_insns[i] = 0;
31976 while (n_nops > 0)
31978 nop = gen_nop ();
31979 emit_insn_before (nop, next_insn);
31980 if (can_issue_more == issue_rate - 1) /* new group begins */
31981 end = false;
31982 can_issue_more--;
31983 if (can_issue_more == 0)
31985 can_issue_more = issue_rate - 1;
31986 (*group_count)++;
31987 end = true;
31988 for (i = 0; i < issue_rate; i++)
31990 group_insns[i] = 0;
31993 n_nops--;
31996 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
31997 can_issue_more++;
31999 /* Is next_insn going to start a new group? */
32000 *group_end
32001 = (end
32002 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32003 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32004 || (can_issue_more < issue_rate &&
32005 insn_terminates_group_p (next_insn, previous_group)));
32006 if (*group_end && end)
32007 (*group_count)--;
32009 if (sched_verbose > 6)
32010 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
32011 *group_count, can_issue_more);
32012 return can_issue_more;
32015 return can_issue_more;
32018 /* This function tries to synch the dispatch groups that the compiler "sees"
32019 with the dispatch groups that the processor dispatcher is expected to
32020 form in practice. It tries to achieve this synchronization by forcing the
32021 estimated processor grouping on the compiler (as opposed to the function
32022 'pad_goups' which tries to force the scheduler's grouping on the processor).
32024 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32025 examines the (estimated) dispatch groups that will be formed by the processor
32026 dispatcher. It marks these group boundaries to reflect the estimated
32027 processor grouping, overriding the grouping that the scheduler had marked.
32028 Depending on the value of the flag '-minsert-sched-nops' this function can
32029 force certain insns into separate groups or force a certain distance between
32030 them by inserting nops, for example, if there exists a "costly dependence"
32031 between the insns.
32033 The function estimates the group boundaries that the processor will form as
32034 follows: It keeps track of how many vacant issue slots are available after
32035 each insn. A subsequent insn will start a new group if one of the following
32036 4 cases applies:
32037 - no more vacant issue slots remain in the current dispatch group.
32038 - only the last issue slot, which is the branch slot, is vacant, but the next
32039 insn is not a branch.
32040 - only the last 2 or less issue slots, including the branch slot, are vacant,
32041 which means that a cracked insn (which occupies two issue slots) can't be
32042 issued in this group.
32043 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32044 start a new group. */
32046 static int
32047 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32048 rtx_insn *tail)
32050 rtx_insn *insn, *next_insn;
32051 int issue_rate;
32052 int can_issue_more;
32053 int slot, i;
32054 bool group_end;
32055 int group_count = 0;
32056 rtx *group_insns;
32058 /* Initialize. */
32059 issue_rate = rs6000_issue_rate ();
32060 group_insns = XALLOCAVEC (rtx, issue_rate);
32061 for (i = 0; i < issue_rate; i++)
32063 group_insns[i] = 0;
32065 can_issue_more = issue_rate;
32066 slot = 0;
32067 insn = get_next_active_insn (prev_head_insn, tail);
32068 group_end = false;
32070 while (insn != NULL_RTX)
32072 slot = (issue_rate - can_issue_more);
32073 group_insns[slot] = insn;
32074 can_issue_more =
32075 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32076 if (insn_terminates_group_p (insn, current_group))
32077 can_issue_more = 0;
32079 next_insn = get_next_active_insn (insn, tail);
32080 if (next_insn == NULL_RTX)
32081 return group_count + 1;
32083 /* Is next_insn going to start a new group? */
32084 group_end
32085 = (can_issue_more == 0
32086 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32087 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32088 || (can_issue_more < issue_rate &&
32089 insn_terminates_group_p (next_insn, previous_group)));
32091 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32092 next_insn, &group_end, can_issue_more,
32093 &group_count);
32095 if (group_end)
32097 group_count++;
32098 can_issue_more = 0;
32099 for (i = 0; i < issue_rate; i++)
32101 group_insns[i] = 0;
32105 if (GET_MODE (next_insn) == TImode && can_issue_more)
32106 PUT_MODE (next_insn, VOIDmode);
32107 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32108 PUT_MODE (next_insn, TImode);
32110 insn = next_insn;
32111 if (can_issue_more == 0)
32112 can_issue_more = issue_rate;
32113 } /* while */
32115 return group_count;
32118 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32119 dispatch group boundaries that the scheduler had marked. Pad with nops
32120 any dispatch groups which have vacant issue slots, in order to force the
32121 scheduler's grouping on the processor dispatcher. The function
32122 returns the number of dispatch groups found. */
32124 static int
32125 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32126 rtx_insn *tail)
32128 rtx_insn *insn, *next_insn;
32129 rtx nop;
32130 int issue_rate;
32131 int can_issue_more;
32132 int group_end;
32133 int group_count = 0;
32135 /* Initialize issue_rate. */
32136 issue_rate = rs6000_issue_rate ();
32137 can_issue_more = issue_rate;
32139 insn = get_next_active_insn (prev_head_insn, tail);
32140 next_insn = get_next_active_insn (insn, tail);
32142 while (insn != NULL_RTX)
32144 can_issue_more =
32145 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32147 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32149 if (next_insn == NULL_RTX)
32150 break;
32152 if (group_end)
32154 /* If the scheduler had marked group termination at this location
32155 (between insn and next_insn), and neither insn nor next_insn will
32156 force group termination, pad the group with nops to force group
32157 termination. */
32158 if (can_issue_more
32159 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32160 && !insn_terminates_group_p (insn, current_group)
32161 && !insn_terminates_group_p (next_insn, previous_group))
32163 if (!is_branch_slot_insn (next_insn))
32164 can_issue_more--;
32166 while (can_issue_more)
32168 nop = gen_nop ();
32169 emit_insn_before (nop, next_insn);
32170 can_issue_more--;
32174 can_issue_more = issue_rate;
32175 group_count++;
32178 insn = next_insn;
32179 next_insn = get_next_active_insn (insn, tail);
32182 return group_count;
32185 /* We're beginning a new block. Initialize data structures as necessary. */
32187 static void
32188 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32189 int sched_verbose ATTRIBUTE_UNUSED,
32190 int max_ready ATTRIBUTE_UNUSED)
32192 last_scheduled_insn = NULL;
32193 load_store_pendulum = 0;
32194 divide_cnt = 0;
32195 vec_pairing = 0;
32198 /* The following function is called at the end of scheduling BB.
32199 After reload, it inserts nops at insn group bundling. */
32201 static void
32202 rs6000_sched_finish (FILE *dump, int sched_verbose)
32204 int n_groups;
32206 if (sched_verbose)
32207 fprintf (dump, "=== Finishing schedule.\n");
32209 if (reload_completed && rs6000_sched_groups)
32211 /* Do not run sched_finish hook when selective scheduling enabled. */
32212 if (sel_sched_p ())
32213 return;
32215 if (rs6000_sched_insert_nops == sched_finish_none)
32216 return;
32218 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32219 n_groups = pad_groups (dump, sched_verbose,
32220 current_sched_info->prev_head,
32221 current_sched_info->next_tail);
32222 else
32223 n_groups = redefine_groups (dump, sched_verbose,
32224 current_sched_info->prev_head,
32225 current_sched_info->next_tail);
32227 if (sched_verbose >= 6)
32229 fprintf (dump, "ngroups = %d\n", n_groups);
32230 print_rtl (dump, current_sched_info->prev_head);
32231 fprintf (dump, "Done finish_sched\n");
32236 struct rs6000_sched_context
32238 short cached_can_issue_more;
32239 rtx_insn *last_scheduled_insn;
32240 int load_store_pendulum;
32241 int divide_cnt;
32242 int vec_pairing;
32245 typedef struct rs6000_sched_context rs6000_sched_context_def;
32246 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32248 /* Allocate store for new scheduling context. */
32249 static void *
32250 rs6000_alloc_sched_context (void)
32252 return xmalloc (sizeof (rs6000_sched_context_def));
32255 /* If CLEAN_P is true then initializes _SC with clean data,
32256 and from the global context otherwise. */
32257 static void
32258 rs6000_init_sched_context (void *_sc, bool clean_p)
32260 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32262 if (clean_p)
32264 sc->cached_can_issue_more = 0;
32265 sc->last_scheduled_insn = NULL;
32266 sc->load_store_pendulum = 0;
32267 sc->divide_cnt = 0;
32268 sc->vec_pairing = 0;
32270 else
32272 sc->cached_can_issue_more = cached_can_issue_more;
32273 sc->last_scheduled_insn = last_scheduled_insn;
32274 sc->load_store_pendulum = load_store_pendulum;
32275 sc->divide_cnt = divide_cnt;
32276 sc->vec_pairing = vec_pairing;
32280 /* Sets the global scheduling context to the one pointed to by _SC. */
32281 static void
32282 rs6000_set_sched_context (void *_sc)
32284 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32286 gcc_assert (sc != NULL);
32288 cached_can_issue_more = sc->cached_can_issue_more;
32289 last_scheduled_insn = sc->last_scheduled_insn;
32290 load_store_pendulum = sc->load_store_pendulum;
32291 divide_cnt = sc->divide_cnt;
32292 vec_pairing = sc->vec_pairing;
32295 /* Free _SC. */
32296 static void
32297 rs6000_free_sched_context (void *_sc)
32299 gcc_assert (_sc != NULL);
32301 free (_sc);
32304 static bool
32305 rs6000_sched_can_speculate_insn (rtx_insn *insn)
32307 switch (get_attr_type (insn))
32309 case TYPE_DIV:
32310 case TYPE_SDIV:
32311 case TYPE_DDIV:
32312 case TYPE_VECDIV:
32313 case TYPE_SSQRT:
32314 case TYPE_DSQRT:
32315 return false;
32317 default:
32318 return true;
32322 /* Length in units of the trampoline for entering a nested function. */
32325 rs6000_trampoline_size (void)
32327 int ret = 0;
32329 switch (DEFAULT_ABI)
32331 default:
32332 gcc_unreachable ();
32334 case ABI_AIX:
32335 ret = (TARGET_32BIT) ? 12 : 24;
32336 break;
32338 case ABI_ELFv2:
32339 gcc_assert (!TARGET_32BIT);
32340 ret = 32;
32341 break;
32343 case ABI_DARWIN:
32344 case ABI_V4:
32345 ret = (TARGET_32BIT) ? 40 : 48;
32346 break;
32349 return ret;
32352 /* Emit RTL insns to initialize the variable parts of a trampoline.
32353 FNADDR is an RTX for the address of the function's pure code.
32354 CXT is an RTX for the static chain value for the function. */
32356 static void
32357 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32359 int regsize = (TARGET_32BIT) ? 4 : 8;
32360 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32361 rtx ctx_reg = force_reg (Pmode, cxt);
32362 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32364 switch (DEFAULT_ABI)
32366 default:
32367 gcc_unreachable ();
32369 /* Under AIX, just build the 3 word function descriptor */
32370 case ABI_AIX:
32372 rtx fnmem, fn_reg, toc_reg;
32374 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32375 error ("You cannot take the address of a nested function if you use "
32376 "the -mno-pointers-to-nested-functions option.");
32378 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32379 fn_reg = gen_reg_rtx (Pmode);
32380 toc_reg = gen_reg_rtx (Pmode);
32382 /* Macro to shorten the code expansions below. */
32383 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32385 m_tramp = replace_equiv_address (m_tramp, addr);
32387 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32388 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32389 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32390 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32391 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32393 # undef MEM_PLUS
32395 break;
32397 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32398 case ABI_ELFv2:
32399 case ABI_DARWIN:
32400 case ABI_V4:
32401 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32402 LCT_NORMAL, VOIDmode, 4,
32403 addr, Pmode,
32404 GEN_INT (rs6000_trampoline_size ()), SImode,
32405 fnaddr, Pmode,
32406 ctx_reg, Pmode);
32407 break;
32412 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32413 identifier as an argument, so the front end shouldn't look it up. */
32415 static bool
32416 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32418 return is_attribute_p ("altivec", attr_id);
32421 /* Handle the "altivec" attribute. The attribute may have
32422 arguments as follows:
32424 __attribute__((altivec(vector__)))
32425 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32426 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32428 and may appear more than once (e.g., 'vector bool char') in a
32429 given declaration. */
32431 static tree
32432 rs6000_handle_altivec_attribute (tree *node,
32433 tree name ATTRIBUTE_UNUSED,
32434 tree args,
32435 int flags ATTRIBUTE_UNUSED,
32436 bool *no_add_attrs)
32438 tree type = *node, result = NULL_TREE;
32439 machine_mode mode;
32440 int unsigned_p;
32441 char altivec_type
32442 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32443 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32444 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32445 : '?');
32447 while (POINTER_TYPE_P (type)
32448 || TREE_CODE (type) == FUNCTION_TYPE
32449 || TREE_CODE (type) == METHOD_TYPE
32450 || TREE_CODE (type) == ARRAY_TYPE)
32451 type = TREE_TYPE (type);
32453 mode = TYPE_MODE (type);
32455 /* Check for invalid AltiVec type qualifiers. */
32456 if (type == long_double_type_node)
32457 error ("use of %<long double%> in AltiVec types is invalid");
32458 else if (type == boolean_type_node)
32459 error ("use of boolean types in AltiVec types is invalid");
32460 else if (TREE_CODE (type) == COMPLEX_TYPE)
32461 error ("use of %<complex%> in AltiVec types is invalid");
32462 else if (DECIMAL_FLOAT_MODE_P (mode))
32463 error ("use of decimal floating point types in AltiVec types is invalid");
32464 else if (!TARGET_VSX)
32466 if (type == long_unsigned_type_node || type == long_integer_type_node)
32468 if (TARGET_64BIT)
32469 error ("use of %<long%> in AltiVec types is invalid for "
32470 "64-bit code without -mvsx");
32471 else if (rs6000_warn_altivec_long)
32472 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32473 "use %<int%>");
32475 else if (type == long_long_unsigned_type_node
32476 || type == long_long_integer_type_node)
32477 error ("use of %<long long%> in AltiVec types is invalid without "
32478 "-mvsx");
32479 else if (type == double_type_node)
32480 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
32483 switch (altivec_type)
32485 case 'v':
32486 unsigned_p = TYPE_UNSIGNED (type);
32487 switch (mode)
32489 case TImode:
32490 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32491 break;
32492 case DImode:
32493 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32494 break;
32495 case SImode:
32496 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32497 break;
32498 case HImode:
32499 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32500 break;
32501 case QImode:
32502 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32503 break;
32504 case SFmode: result = V4SF_type_node; break;
32505 case DFmode: result = V2DF_type_node; break;
32506 /* If the user says 'vector int bool', we may be handed the 'bool'
32507 attribute _before_ the 'vector' attribute, and so select the
32508 proper type in the 'b' case below. */
32509 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
32510 case V2DImode: case V2DFmode:
32511 result = type;
32512 default: break;
32514 break;
32515 case 'b':
32516 switch (mode)
32518 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
32519 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
32520 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
32521 case QImode: case V16QImode: result = bool_V16QI_type_node;
32522 default: break;
32524 break;
32525 case 'p':
32526 switch (mode)
32528 case V8HImode: result = pixel_V8HI_type_node;
32529 default: break;
32531 default: break;
32534 /* Propagate qualifiers attached to the element type
32535 onto the vector type. */
32536 if (result && result != type && TYPE_QUALS (type))
32537 result = build_qualified_type (result, TYPE_QUALS (type));
32539 *no_add_attrs = true; /* No need to hang on to the attribute. */
32541 if (result)
32542 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32544 return NULL_TREE;
32547 /* AltiVec defines four built-in scalar types that serve as vector
32548 elements; we must teach the compiler how to mangle them. */
32550 static const char *
32551 rs6000_mangle_type (const_tree type)
32553 type = TYPE_MAIN_VARIANT (type);
32555 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32556 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32557 return NULL;
32559 if (type == bool_char_type_node) return "U6__boolc";
32560 if (type == bool_short_type_node) return "U6__bools";
32561 if (type == pixel_type_node) return "u7__pixel";
32562 if (type == bool_int_type_node) return "U6__booli";
32563 if (type == bool_long_type_node) return "U6__booll";
32565 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
32566 "g" for IBM extended double, no matter whether it is long double (using
32567 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
32568 if (TARGET_FLOAT128_TYPE)
32570 if (type == ieee128_float_type_node)
32571 return "U10__float128";
32573 if (type == ibm128_float_type_node)
32574 return "g";
32576 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
32577 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
32580 /* Mangle IBM extended float long double as `g' (__float128) on
32581 powerpc*-linux where long-double-64 previously was the default. */
32582 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
32583 && TARGET_ELF
32584 && TARGET_LONG_DOUBLE_128
32585 && !TARGET_IEEEQUAD)
32586 return "g";
32588 /* For all other types, use normal C++ mangling. */
32589 return NULL;
32592 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32593 struct attribute_spec.handler. */
32595 static tree
32596 rs6000_handle_longcall_attribute (tree *node, tree name,
32597 tree args ATTRIBUTE_UNUSED,
32598 int flags ATTRIBUTE_UNUSED,
32599 bool *no_add_attrs)
32601 if (TREE_CODE (*node) != FUNCTION_TYPE
32602 && TREE_CODE (*node) != FIELD_DECL
32603 && TREE_CODE (*node) != TYPE_DECL)
32605 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32606 name);
32607 *no_add_attrs = true;
32610 return NULL_TREE;
32613 /* Set longcall attributes on all functions declared when
32614 rs6000_default_long_calls is true. */
32615 static void
32616 rs6000_set_default_type_attributes (tree type)
32618 if (rs6000_default_long_calls
32619 && (TREE_CODE (type) == FUNCTION_TYPE
32620 || TREE_CODE (type) == METHOD_TYPE))
32621 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32622 NULL_TREE,
32623 TYPE_ATTRIBUTES (type));
32625 #if TARGET_MACHO
32626 darwin_set_default_type_attributes (type);
32627 #endif
32630 /* Return a reference suitable for calling a function with the
32631 longcall attribute. */
32634 rs6000_longcall_ref (rtx call_ref)
32636 const char *call_name;
32637 tree node;
32639 if (GET_CODE (call_ref) != SYMBOL_REF)
32640 return call_ref;
32642 /* System V adds '.' to the internal name, so skip them. */
32643 call_name = XSTR (call_ref, 0);
32644 if (*call_name == '.')
32646 while (*call_name == '.')
32647 call_name++;
32649 node = get_identifier (call_name);
32650 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32653 return force_reg (Pmode, call_ref);
32656 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32657 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32658 #endif
32660 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32661 struct attribute_spec.handler. */
32662 static tree
32663 rs6000_handle_struct_attribute (tree *node, tree name,
32664 tree args ATTRIBUTE_UNUSED,
32665 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32667 tree *type = NULL;
32668 if (DECL_P (*node))
32670 if (TREE_CODE (*node) == TYPE_DECL)
32671 type = &TREE_TYPE (*node);
32673 else
32674 type = node;
32676 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32677 || TREE_CODE (*type) == UNION_TYPE)))
32679 warning (OPT_Wattributes, "%qE attribute ignored", name);
32680 *no_add_attrs = true;
32683 else if ((is_attribute_p ("ms_struct", name)
32684 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32685 || ((is_attribute_p ("gcc_struct", name)
32686 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32688 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32689 name);
32690 *no_add_attrs = true;
32693 return NULL_TREE;
32696 static bool
32697 rs6000_ms_bitfield_layout_p (const_tree record_type)
32699 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32700 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32701 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32704 #ifdef USING_ELFOS_H
32706 /* A get_unnamed_section callback, used for switching to toc_section. */
32708 static void
32709 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32711 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32712 && TARGET_MINIMAL_TOC)
32714 if (!toc_initialized)
32716 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32717 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32718 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32719 fprintf (asm_out_file, "\t.tc ");
32720 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32721 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32722 fprintf (asm_out_file, "\n");
32724 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32725 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32726 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32727 fprintf (asm_out_file, " = .+32768\n");
32728 toc_initialized = 1;
32730 else
32731 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32733 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32735 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32736 if (!toc_initialized)
32738 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32739 toc_initialized = 1;
32742 else
32744 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32745 if (!toc_initialized)
32747 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32748 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32749 fprintf (asm_out_file, " = .+32768\n");
32750 toc_initialized = 1;
32755 /* Implement TARGET_ASM_INIT_SECTIONS. */
32757 static void
32758 rs6000_elf_asm_init_sections (void)
32760 toc_section
32761 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32763 sdata2_section
32764 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32765 SDATA2_SECTION_ASM_OP);
32768 /* Implement TARGET_SELECT_RTX_SECTION. */
32770 static section *
32771 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32772 unsigned HOST_WIDE_INT align)
32774 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32775 return toc_section;
32776 else
32777 return default_elf_select_rtx_section (mode, x, align);
32780 /* For a SYMBOL_REF, set generic flags and then perform some
32781 target-specific processing.
32783 When the AIX ABI is requested on a non-AIX system, replace the
32784 function name with the real name (with a leading .) rather than the
32785 function descriptor name. This saves a lot of overriding code to
32786 read the prefixes. */
32788 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32789 static void
32790 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32792 default_encode_section_info (decl, rtl, first);
32794 if (first
32795 && TREE_CODE (decl) == FUNCTION_DECL
32796 && !TARGET_AIX
32797 && DEFAULT_ABI == ABI_AIX)
32799 rtx sym_ref = XEXP (rtl, 0);
32800 size_t len = strlen (XSTR (sym_ref, 0));
32801 char *str = XALLOCAVEC (char, len + 2);
32802 str[0] = '.';
32803 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
32804 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
32808 static inline bool
32809 compare_section_name (const char *section, const char *templ)
32811 int len;
32813 len = strlen (templ);
32814 return (strncmp (section, templ, len) == 0
32815 && (section[len] == 0 || section[len] == '.'));
32818 bool
32819 rs6000_elf_in_small_data_p (const_tree decl)
32821 if (rs6000_sdata == SDATA_NONE)
32822 return false;
32824 /* We want to merge strings, so we never consider them small data. */
32825 if (TREE_CODE (decl) == STRING_CST)
32826 return false;
32828 /* Functions are never in the small data area. */
32829 if (TREE_CODE (decl) == FUNCTION_DECL)
32830 return false;
32832 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
32834 const char *section = DECL_SECTION_NAME (decl);
32835 if (compare_section_name (section, ".sdata")
32836 || compare_section_name (section, ".sdata2")
32837 || compare_section_name (section, ".gnu.linkonce.s")
32838 || compare_section_name (section, ".sbss")
32839 || compare_section_name (section, ".sbss2")
32840 || compare_section_name (section, ".gnu.linkonce.sb")
32841 || strcmp (section, ".PPC.EMB.sdata0") == 0
32842 || strcmp (section, ".PPC.EMB.sbss0") == 0)
32843 return true;
32845 else
32847 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
32849 if (size > 0
32850 && size <= g_switch_value
32851 /* If it's not public, and we're not going to reference it there,
32852 there's no need to put it in the small data section. */
32853 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
32854 return true;
32857 return false;
32860 #endif /* USING_ELFOS_H */
32862 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
32864 static bool
32865 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
32867 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
32870 /* Do not place thread-local symbols refs in the object blocks. */
32872 static bool
32873 rs6000_use_blocks_for_decl_p (const_tree decl)
32875 return !DECL_THREAD_LOCAL_P (decl);
32878 /* Return a REG that occurs in ADDR with coefficient 1.
32879 ADDR can be effectively incremented by incrementing REG.
32881 r0 is special and we must not select it as an address
32882 register by this routine since our caller will try to
32883 increment the returned register via an "la" instruction. */
32886 find_addr_reg (rtx addr)
32888 while (GET_CODE (addr) == PLUS)
32890 if (GET_CODE (XEXP (addr, 0)) == REG
32891 && REGNO (XEXP (addr, 0)) != 0)
32892 addr = XEXP (addr, 0);
32893 else if (GET_CODE (XEXP (addr, 1)) == REG
32894 && REGNO (XEXP (addr, 1)) != 0)
32895 addr = XEXP (addr, 1);
32896 else if (CONSTANT_P (XEXP (addr, 0)))
32897 addr = XEXP (addr, 1);
32898 else if (CONSTANT_P (XEXP (addr, 1)))
32899 addr = XEXP (addr, 0);
32900 else
32901 gcc_unreachable ();
32903 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
32904 return addr;
32907 void
32908 rs6000_fatal_bad_address (rtx op)
32910 fatal_insn ("bad address", op);
32913 #if TARGET_MACHO
32915 typedef struct branch_island_d {
32916 tree function_name;
32917 tree label_name;
32918 int line_number;
32919 } branch_island;
32922 static vec<branch_island, va_gc> *branch_islands;
32924 /* Remember to generate a branch island for far calls to the given
32925 function. */
32927 static void
32928 add_compiler_branch_island (tree label_name, tree function_name,
32929 int line_number)
32931 branch_island bi = {function_name, label_name, line_number};
32932 vec_safe_push (branch_islands, bi);
32935 /* Generate far-jump branch islands for everything recorded in
32936 branch_islands. Invoked immediately after the last instruction of
32937 the epilogue has been emitted; the branch islands must be appended
32938 to, and contiguous with, the function body. Mach-O stubs are
32939 generated in machopic_output_stub(). */
32941 static void
32942 macho_branch_islands (void)
32944 char tmp_buf[512];
32946 while (!vec_safe_is_empty (branch_islands))
32948 branch_island *bi = &branch_islands->last ();
32949 const char *label = IDENTIFIER_POINTER (bi->label_name);
32950 const char *name = IDENTIFIER_POINTER (bi->function_name);
32951 char name_buf[512];
32952 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
32953 if (name[0] == '*' || name[0] == '&')
32954 strcpy (name_buf, name+1);
32955 else
32957 name_buf[0] = '_';
32958 strcpy (name_buf+1, name);
32960 strcpy (tmp_buf, "\n");
32961 strcat (tmp_buf, label);
32962 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32963 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32964 dbxout_stabd (N_SLINE, bi->line_number);
32965 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32966 if (flag_pic)
32968 if (TARGET_LINK_STACK)
32970 char name[32];
32971 get_ppc476_thunk_name (name);
32972 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
32973 strcat (tmp_buf, name);
32974 strcat (tmp_buf, "\n");
32975 strcat (tmp_buf, label);
32976 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32978 else
32980 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
32981 strcat (tmp_buf, label);
32982 strcat (tmp_buf, "_pic\n");
32983 strcat (tmp_buf, label);
32984 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32987 strcat (tmp_buf, "\taddis r11,r11,ha16(");
32988 strcat (tmp_buf, name_buf);
32989 strcat (tmp_buf, " - ");
32990 strcat (tmp_buf, label);
32991 strcat (tmp_buf, "_pic)\n");
32993 strcat (tmp_buf, "\tmtlr r0\n");
32995 strcat (tmp_buf, "\taddi r12,r11,lo16(");
32996 strcat (tmp_buf, name_buf);
32997 strcat (tmp_buf, " - ");
32998 strcat (tmp_buf, label);
32999 strcat (tmp_buf, "_pic)\n");
33001 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
33003 else
33005 strcat (tmp_buf, ":\nlis r12,hi16(");
33006 strcat (tmp_buf, name_buf);
33007 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
33008 strcat (tmp_buf, name_buf);
33009 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
33011 output_asm_insn (tmp_buf, 0);
33012 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33013 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33014 dbxout_stabd (N_SLINE, bi->line_number);
33015 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33016 branch_islands->pop ();
33020 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33021 already there or not. */
33023 static int
33024 no_previous_def (tree function_name)
33026 branch_island *bi;
33027 unsigned ix;
33029 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33030 if (function_name == bi->function_name)
33031 return 0;
33032 return 1;
33035 /* GET_PREV_LABEL gets the label name from the previous definition of
33036 the function. */
33038 static tree
33039 get_prev_label (tree function_name)
33041 branch_island *bi;
33042 unsigned ix;
33044 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33045 if (function_name == bi->function_name)
33046 return bi->label_name;
33047 return NULL_TREE;
33050 /* INSN is either a function call or a millicode call. It may have an
33051 unconditional jump in its delay slot.
33053 CALL_DEST is the routine we are calling. */
33055 char *
33056 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
33057 int cookie_operand_number)
33059 static char buf[256];
33060 if (darwin_emit_branch_islands
33061 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
33062 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
33064 tree labelname;
33065 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
33067 if (no_previous_def (funname))
33069 rtx label_rtx = gen_label_rtx ();
33070 char *label_buf, temp_buf[256];
33071 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
33072 CODE_LABEL_NUMBER (label_rtx));
33073 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
33074 labelname = get_identifier (label_buf);
33075 add_compiler_branch_island (labelname, funname, insn_line (insn));
33077 else
33078 labelname = get_prev_label (funname);
33080 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
33081 instruction will reach 'foo', otherwise link as 'bl L42'".
33082 "L42" should be a 'branch island', that will do a far jump to
33083 'foo'. Branch islands are generated in
33084 macho_branch_islands(). */
33085 sprintf (buf, "jbsr %%z%d,%.246s",
33086 dest_operand_number, IDENTIFIER_POINTER (labelname));
33088 else
33089 sprintf (buf, "bl %%z%d", dest_operand_number);
33090 return buf;
33093 /* Generate PIC and indirect symbol stubs. */
33095 void
33096 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33098 unsigned int length;
33099 char *symbol_name, *lazy_ptr_name;
33100 char *local_label_0;
33101 static int label = 0;
33103 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33104 symb = (*targetm.strip_name_encoding) (symb);
33107 length = strlen (symb);
33108 symbol_name = XALLOCAVEC (char, length + 32);
33109 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33111 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33112 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33114 if (flag_pic == 2)
33115 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33116 else
33117 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33119 if (flag_pic == 2)
33121 fprintf (file, "\t.align 5\n");
33123 fprintf (file, "%s:\n", stub);
33124 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33126 label++;
33127 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33128 sprintf (local_label_0, "\"L%011d$spb\"", label);
33130 fprintf (file, "\tmflr r0\n");
33131 if (TARGET_LINK_STACK)
33133 char name[32];
33134 get_ppc476_thunk_name (name);
33135 fprintf (file, "\tbl %s\n", name);
33136 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33138 else
33140 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33141 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33143 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33144 lazy_ptr_name, local_label_0);
33145 fprintf (file, "\tmtlr r0\n");
33146 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33147 (TARGET_64BIT ? "ldu" : "lwzu"),
33148 lazy_ptr_name, local_label_0);
33149 fprintf (file, "\tmtctr r12\n");
33150 fprintf (file, "\tbctr\n");
33152 else
33154 fprintf (file, "\t.align 4\n");
33156 fprintf (file, "%s:\n", stub);
33157 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33159 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33160 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33161 (TARGET_64BIT ? "ldu" : "lwzu"),
33162 lazy_ptr_name);
33163 fprintf (file, "\tmtctr r12\n");
33164 fprintf (file, "\tbctr\n");
33167 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
33168 fprintf (file, "%s:\n", lazy_ptr_name);
33169 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33170 fprintf (file, "%sdyld_stub_binding_helper\n",
33171 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
33174 /* Legitimize PIC addresses. If the address is already
33175 position-independent, we return ORIG. Newly generated
33176 position-independent addresses go into a reg. This is REG if non
33177 zero, otherwise we allocate register(s) as necessary. */
33179 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33182 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33183 rtx reg)
33185 rtx base, offset;
33187 if (reg == NULL && ! reload_in_progress && ! reload_completed)
33188 reg = gen_reg_rtx (Pmode);
33190 if (GET_CODE (orig) == CONST)
33192 rtx reg_temp;
33194 if (GET_CODE (XEXP (orig, 0)) == PLUS
33195 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33196 return orig;
33198 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33200 /* Use a different reg for the intermediate value, as
33201 it will be marked UNCHANGING. */
33202 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33203 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33204 Pmode, reg_temp);
33205 offset =
33206 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33207 Pmode, reg);
33209 if (GET_CODE (offset) == CONST_INT)
33211 if (SMALL_INT (offset))
33212 return plus_constant (Pmode, base, INTVAL (offset));
33213 else if (! reload_in_progress && ! reload_completed)
33214 offset = force_reg (Pmode, offset);
33215 else
33217 rtx mem = force_const_mem (Pmode, orig);
33218 return machopic_legitimize_pic_address (mem, Pmode, reg);
33221 return gen_rtx_PLUS (Pmode, base, offset);
33224 /* Fall back on generic machopic code. */
33225 return machopic_legitimize_pic_address (orig, mode, reg);
33228 /* Output a .machine directive for the Darwin assembler, and call
33229 the generic start_file routine. */
33231 static void
33232 rs6000_darwin_file_start (void)
33234 static const struct
33236 const char *arg;
33237 const char *name;
33238 HOST_WIDE_INT if_set;
33239 } mapping[] = {
33240 { "ppc64", "ppc64", MASK_64BIT },
33241 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33242 { "power4", "ppc970", 0 },
33243 { "G5", "ppc970", 0 },
33244 { "7450", "ppc7450", 0 },
33245 { "7400", "ppc7400", MASK_ALTIVEC },
33246 { "G4", "ppc7400", 0 },
33247 { "750", "ppc750", 0 },
33248 { "740", "ppc750", 0 },
33249 { "G3", "ppc750", 0 },
33250 { "604e", "ppc604e", 0 },
33251 { "604", "ppc604", 0 },
33252 { "603e", "ppc603", 0 },
33253 { "603", "ppc603", 0 },
33254 { "601", "ppc601", 0 },
33255 { NULL, "ppc", 0 } };
33256 const char *cpu_id = "";
33257 size_t i;
33259 rs6000_file_start ();
33260 darwin_file_start ();
33262 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33264 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33265 cpu_id = rs6000_default_cpu;
33267 if (global_options_set.x_rs6000_cpu_index)
33268 cpu_id = processor_target_table[rs6000_cpu_index].name;
33270 /* Look through the mapping array. Pick the first name that either
33271 matches the argument, has a bit set in IF_SET that is also set
33272 in the target flags, or has a NULL name. */
33274 i = 0;
33275 while (mapping[i].arg != NULL
33276 && strcmp (mapping[i].arg, cpu_id) != 0
33277 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33278 i++;
33280 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33283 #endif /* TARGET_MACHO */
33285 #if TARGET_ELF
33286 static int
33287 rs6000_elf_reloc_rw_mask (void)
33289 if (flag_pic)
33290 return 3;
33291 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33292 return 2;
33293 else
33294 return 0;
33297 /* Record an element in the table of global constructors. SYMBOL is
33298 a SYMBOL_REF of the function to be called; PRIORITY is a number
33299 between 0 and MAX_INIT_PRIORITY.
33301 This differs from default_named_section_asm_out_constructor in
33302 that we have special handling for -mrelocatable. */
33304 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33305 static void
33306 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33308 const char *section = ".ctors";
33309 char buf[18];
33311 if (priority != DEFAULT_INIT_PRIORITY)
33313 sprintf (buf, ".ctors.%.5u",
33314 /* Invert the numbering so the linker puts us in the proper
33315 order; constructors are run from right to left, and the
33316 linker sorts in increasing order. */
33317 MAX_INIT_PRIORITY - priority);
33318 section = buf;
33321 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33322 assemble_align (POINTER_SIZE);
33324 if (DEFAULT_ABI == ABI_V4
33325 && (TARGET_RELOCATABLE || flag_pic > 1))
33327 fputs ("\t.long (", asm_out_file);
33328 output_addr_const (asm_out_file, symbol);
33329 fputs (")@fixup\n", asm_out_file);
33331 else
33332 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33335 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33336 static void
33337 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33339 const char *section = ".dtors";
33340 char buf[18];
33342 if (priority != DEFAULT_INIT_PRIORITY)
33344 sprintf (buf, ".dtors.%.5u",
33345 /* Invert the numbering so the linker puts us in the proper
33346 order; constructors are run from right to left, and the
33347 linker sorts in increasing order. */
33348 MAX_INIT_PRIORITY - priority);
33349 section = buf;
33352 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33353 assemble_align (POINTER_SIZE);
33355 if (DEFAULT_ABI == ABI_V4
33356 && (TARGET_RELOCATABLE || flag_pic > 1))
33358 fputs ("\t.long (", asm_out_file);
33359 output_addr_const (asm_out_file, symbol);
33360 fputs (")@fixup\n", asm_out_file);
33362 else
33363 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33366 void
33367 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33369 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33371 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33372 ASM_OUTPUT_LABEL (file, name);
33373 fputs (DOUBLE_INT_ASM_OP, file);
33374 rs6000_output_function_entry (file, name);
33375 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33376 if (DOT_SYMBOLS)
33378 fputs ("\t.size\t", file);
33379 assemble_name (file, name);
33380 fputs (",24\n\t.type\t.", file);
33381 assemble_name (file, name);
33382 fputs (",@function\n", file);
33383 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33385 fputs ("\t.globl\t.", file);
33386 assemble_name (file, name);
33387 putc ('\n', file);
33390 else
33391 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33392 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33393 rs6000_output_function_entry (file, name);
33394 fputs (":\n", file);
33395 return;
33398 if (DEFAULT_ABI == ABI_V4
33399 && (TARGET_RELOCATABLE || flag_pic > 1)
33400 && !TARGET_SECURE_PLT
33401 && (!constant_pool_empty_p () || crtl->profile)
33402 && uses_TOC ())
33404 char buf[256];
33406 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33408 fprintf (file, "\t.long ");
33409 assemble_name (file, toc_label_name);
33410 need_toc_init = 1;
33411 putc ('-', file);
33412 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33413 assemble_name (file, buf);
33414 putc ('\n', file);
33417 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33418 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33420 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33422 char buf[256];
33424 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33426 fprintf (file, "\t.quad .TOC.-");
33427 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33428 assemble_name (file, buf);
33429 putc ('\n', file);
33432 if (DEFAULT_ABI == ABI_AIX)
33434 const char *desc_name, *orig_name;
33436 orig_name = (*targetm.strip_name_encoding) (name);
33437 desc_name = orig_name;
33438 while (*desc_name == '.')
33439 desc_name++;
33441 if (TREE_PUBLIC (decl))
33442 fprintf (file, "\t.globl %s\n", desc_name);
33444 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33445 fprintf (file, "%s:\n", desc_name);
33446 fprintf (file, "\t.long %s\n", orig_name);
33447 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33448 fputs ("\t.long 0\n", file);
33449 fprintf (file, "\t.previous\n");
33451 ASM_OUTPUT_LABEL (file, name);
33454 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33455 static void
33456 rs6000_elf_file_end (void)
33458 #ifdef HAVE_AS_GNU_ATTRIBUTE
33459 /* ??? The value emitted depends on options active at file end.
33460 Assume anyone using #pragma or attributes that might change
33461 options knows what they are doing. */
33462 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33463 && rs6000_passes_float)
33465 int fp;
33467 if (TARGET_DF_FPR)
33468 fp = 1;
33469 else if (TARGET_SF_FPR)
33470 fp = 3;
33471 else
33472 fp = 2;
33473 if (rs6000_passes_long_double)
33475 if (!TARGET_LONG_DOUBLE_128)
33476 fp |= 2 * 4;
33477 else if (TARGET_IEEEQUAD)
33478 fp |= 3 * 4;
33479 else
33480 fp |= 1 * 4;
33482 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33484 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33486 if (rs6000_passes_vector)
33487 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33488 (TARGET_ALTIVEC_ABI ? 2 : 1));
33489 if (rs6000_returns_struct)
33490 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33491 aix_struct_return ? 2 : 1);
33493 #endif
33494 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33495 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33496 file_end_indicate_exec_stack ();
33497 #endif
33499 if (flag_split_stack)
33500 file_end_indicate_split_stack ();
33502 if (cpu_builtin_p)
33504 /* We have expanded a CPU builtin, so we need to emit a reference to
33505 the special symbol that LIBC uses to declare it supports the
33506 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33507 switch_to_section (data_section);
33508 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33509 fprintf (asm_out_file, "\t%s %s\n",
33510 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33513 #endif
33515 #if TARGET_XCOFF
33517 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33518 #define HAVE_XCOFF_DWARF_EXTRAS 0
33519 #endif
33521 static enum unwind_info_type
33522 rs6000_xcoff_debug_unwind_info (void)
33524 return UI_NONE;
33527 static void
33528 rs6000_xcoff_asm_output_anchor (rtx symbol)
33530 char buffer[100];
33532 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33533 SYMBOL_REF_BLOCK_OFFSET (symbol));
33534 fprintf (asm_out_file, "%s", SET_ASM_OP);
33535 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33536 fprintf (asm_out_file, ",");
33537 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33538 fprintf (asm_out_file, "\n");
33541 static void
33542 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33544 fputs (GLOBAL_ASM_OP, stream);
33545 RS6000_OUTPUT_BASENAME (stream, name);
33546 putc ('\n', stream);
33549 /* A get_unnamed_decl callback, used for read-only sections. PTR
33550 points to the section string variable. */
33552 static void
33553 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33555 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33556 *(const char *const *) directive,
33557 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33560 /* Likewise for read-write sections. */
33562 static void
33563 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33565 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33566 *(const char *const *) directive,
33567 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33570 static void
33571 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33573 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33574 *(const char *const *) directive,
33575 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33578 /* A get_unnamed_section callback, used for switching to toc_section. */
33580 static void
33581 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33583 if (TARGET_MINIMAL_TOC)
33585 /* toc_section is always selected at least once from
33586 rs6000_xcoff_file_start, so this is guaranteed to
33587 always be defined once and only once in each file. */
33588 if (!toc_initialized)
33590 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33591 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33592 toc_initialized = 1;
33594 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33595 (TARGET_32BIT ? "" : ",3"));
33597 else
33598 fputs ("\t.toc\n", asm_out_file);
33601 /* Implement TARGET_ASM_INIT_SECTIONS. */
33603 static void
33604 rs6000_xcoff_asm_init_sections (void)
33606 read_only_data_section
33607 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33608 &xcoff_read_only_section_name);
33610 private_data_section
33611 = get_unnamed_section (SECTION_WRITE,
33612 rs6000_xcoff_output_readwrite_section_asm_op,
33613 &xcoff_private_data_section_name);
33615 tls_data_section
33616 = get_unnamed_section (SECTION_TLS,
33617 rs6000_xcoff_output_tls_section_asm_op,
33618 &xcoff_tls_data_section_name);
33620 tls_private_data_section
33621 = get_unnamed_section (SECTION_TLS,
33622 rs6000_xcoff_output_tls_section_asm_op,
33623 &xcoff_private_data_section_name);
33625 read_only_private_data_section
33626 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33627 &xcoff_private_data_section_name);
33629 toc_section
33630 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33632 readonly_data_section = read_only_data_section;
33635 static int
33636 rs6000_xcoff_reloc_rw_mask (void)
33638 return 3;
33641 static void
33642 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33643 tree decl ATTRIBUTE_UNUSED)
33645 int smclass;
33646 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33648 if (flags & SECTION_EXCLUDE)
33649 smclass = 4;
33650 else if (flags & SECTION_DEBUG)
33652 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33653 return;
33655 else if (flags & SECTION_CODE)
33656 smclass = 0;
33657 else if (flags & SECTION_TLS)
33658 smclass = 3;
33659 else if (flags & SECTION_WRITE)
33660 smclass = 2;
33661 else
33662 smclass = 1;
33664 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33665 (flags & SECTION_CODE) ? "." : "",
33666 name, suffix[smclass], flags & SECTION_ENTSIZE);
33669 #define IN_NAMED_SECTION(DECL) \
33670 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33671 && DECL_SECTION_NAME (DECL) != NULL)
33673 static section *
33674 rs6000_xcoff_select_section (tree decl, int reloc,
33675 unsigned HOST_WIDE_INT align)
33677 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33678 named section. */
33679 if (align > BIGGEST_ALIGNMENT)
33681 resolve_unique_section (decl, reloc, true);
33682 if (IN_NAMED_SECTION (decl))
33683 return get_named_section (decl, NULL, reloc);
33686 if (decl_readonly_section (decl, reloc))
33688 if (TREE_PUBLIC (decl))
33689 return read_only_data_section;
33690 else
33691 return read_only_private_data_section;
33693 else
33695 #if HAVE_AS_TLS
33696 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33698 if (TREE_PUBLIC (decl))
33699 return tls_data_section;
33700 else if (bss_initializer_p (decl))
33702 /* Convert to COMMON to emit in BSS. */
33703 DECL_COMMON (decl) = 1;
33704 return tls_comm_section;
33706 else
33707 return tls_private_data_section;
33709 else
33710 #endif
33711 if (TREE_PUBLIC (decl))
33712 return data_section;
33713 else
33714 return private_data_section;
33718 static void
33719 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33721 const char *name;
33723 /* Use select_section for private data and uninitialized data with
33724 alignment <= BIGGEST_ALIGNMENT. */
33725 if (!TREE_PUBLIC (decl)
33726 || DECL_COMMON (decl)
33727 || (DECL_INITIAL (decl) == NULL_TREE
33728 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33729 || DECL_INITIAL (decl) == error_mark_node
33730 || (flag_zero_initialized_in_bss
33731 && initializer_zerop (DECL_INITIAL (decl))))
33732 return;
33734 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33735 name = (*targetm.strip_name_encoding) (name);
33736 set_decl_section_name (decl, name);
33739 /* Select section for constant in constant pool.
33741 On RS/6000, all constants are in the private read-only data area.
33742 However, if this is being placed in the TOC it must be output as a
33743 toc entry. */
33745 static section *
33746 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33747 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33749 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33750 return toc_section;
33751 else
33752 return read_only_private_data_section;
33755 /* Remove any trailing [DS] or the like from the symbol name. */
33757 static const char *
33758 rs6000_xcoff_strip_name_encoding (const char *name)
33760 size_t len;
33761 if (*name == '*')
33762 name++;
33763 len = strlen (name);
33764 if (name[len - 1] == ']')
33765 return ggc_alloc_string (name, len - 4);
33766 else
33767 return name;
33770 /* Section attributes. AIX is always PIC. */
33772 static unsigned int
33773 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33775 unsigned int align;
33776 unsigned int flags = default_section_type_flags (decl, name, reloc);
33778 /* Align to at least UNIT size. */
33779 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33780 align = MIN_UNITS_PER_WORD;
33781 else
33782 /* Increase alignment of large objects if not already stricter. */
33783 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33784 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33785 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33787 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33790 /* Output at beginning of assembler file.
33792 Initialize the section names for the RS/6000 at this point.
33794 Specify filename, including full path, to assembler.
33796 We want to go into the TOC section so at least one .toc will be emitted.
33797 Also, in order to output proper .bs/.es pairs, we need at least one static
33798 [RW] section emitted.
33800 Finally, declare mcount when profiling to make the assembler happy. */
33802 static void
33803 rs6000_xcoff_file_start (void)
33805 rs6000_gen_section_name (&xcoff_bss_section_name,
33806 main_input_filename, ".bss_");
33807 rs6000_gen_section_name (&xcoff_private_data_section_name,
33808 main_input_filename, ".rw_");
33809 rs6000_gen_section_name (&xcoff_read_only_section_name,
33810 main_input_filename, ".ro_");
33811 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33812 main_input_filename, ".tls_");
33813 rs6000_gen_section_name (&xcoff_tbss_section_name,
33814 main_input_filename, ".tbss_[UL]");
33816 fputs ("\t.file\t", asm_out_file);
33817 output_quoted_string (asm_out_file, main_input_filename);
33818 fputc ('\n', asm_out_file);
33819 if (write_symbols != NO_DEBUG)
33820 switch_to_section (private_data_section);
33821 switch_to_section (toc_section);
33822 switch_to_section (text_section);
33823 if (profile_flag)
33824 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33825 rs6000_file_start ();
33828 /* Output at end of assembler file.
33829 On the RS/6000, referencing data should automatically pull in text. */
33831 static void
33832 rs6000_xcoff_file_end (void)
33834 switch_to_section (text_section);
33835 fputs ("_section_.text:\n", asm_out_file);
33836 switch_to_section (data_section);
33837 fputs (TARGET_32BIT
33838 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33839 asm_out_file);
33842 struct declare_alias_data
33844 FILE *file;
33845 bool function_descriptor;
33848 /* Declare alias N. A helper function for for_node_and_aliases. */
33850 static bool
33851 rs6000_declare_alias (struct symtab_node *n, void *d)
33853 struct declare_alias_data *data = (struct declare_alias_data *)d;
33854 /* Main symbol is output specially, because varasm machinery does part of
33855 the job for us - we do not need to declare .globl/lglobs and such. */
33856 if (!n->alias || n->weakref)
33857 return false;
33859 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
33860 return false;
33862 /* Prevent assemble_alias from trying to use .set pseudo operation
33863 that does not behave as expected by the middle-end. */
33864 TREE_ASM_WRITTEN (n->decl) = true;
33866 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
33867 char *buffer = (char *) alloca (strlen (name) + 2);
33868 char *p;
33869 int dollar_inside = 0;
33871 strcpy (buffer, name);
33872 p = strchr (buffer, '$');
33873 while (p) {
33874 *p = '_';
33875 dollar_inside++;
33876 p = strchr (p + 1, '$');
33878 if (TREE_PUBLIC (n->decl))
33880 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
33882 if (dollar_inside) {
33883 if (data->function_descriptor)
33884 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33885 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33887 if (data->function_descriptor)
33889 fputs ("\t.globl .", data->file);
33890 RS6000_OUTPUT_BASENAME (data->file, buffer);
33891 putc ('\n', data->file);
33893 fputs ("\t.globl ", data->file);
33894 RS6000_OUTPUT_BASENAME (data->file, buffer);
33895 putc ('\n', data->file);
33897 #ifdef ASM_WEAKEN_DECL
33898 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
33899 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
33900 #endif
33902 else
33904 if (dollar_inside)
33906 if (data->function_descriptor)
33907 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33908 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33910 if (data->function_descriptor)
33912 fputs ("\t.lglobl .", data->file);
33913 RS6000_OUTPUT_BASENAME (data->file, buffer);
33914 putc ('\n', data->file);
33916 fputs ("\t.lglobl ", data->file);
33917 RS6000_OUTPUT_BASENAME (data->file, buffer);
33918 putc ('\n', data->file);
33920 if (data->function_descriptor)
33921 fputs (".", data->file);
33922 RS6000_OUTPUT_BASENAME (data->file, buffer);
33923 fputs (":\n", data->file);
33924 return false;
33928 #ifdef HAVE_GAS_HIDDEN
33929 /* Helper function to calculate visibility of a DECL
33930 and return the value as a const string. */
33932 static const char *
33933 rs6000_xcoff_visibility (tree decl)
33935 static const char * const visibility_types[] = {
33936 "", ",protected", ",hidden", ",internal"
33939 enum symbol_visibility vis = DECL_VISIBILITY (decl);
33941 if (TREE_CODE (decl) == FUNCTION_DECL
33942 && cgraph_node::get (decl)
33943 && cgraph_node::get (decl)->instrumentation_clone
33944 && cgraph_node::get (decl)->instrumented_version)
33945 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
33947 return visibility_types[vis];
33949 #endif
33952 /* This macro produces the initial definition of a function name.
33953 On the RS/6000, we need to place an extra '.' in the function name and
33954 output the function descriptor.
33955 Dollar signs are converted to underscores.
33957 The csect for the function will have already been created when
33958 text_section was selected. We do have to go back to that csect, however.
33960 The third and fourth parameters to the .function pseudo-op (16 and 044)
33961 are placeholders which no longer have any use.
33963 Because AIX assembler's .set command has unexpected semantics, we output
33964 all aliases as alternative labels in front of the definition. */
33966 void
33967 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
33969 char *buffer = (char *) alloca (strlen (name) + 1);
33970 char *p;
33971 int dollar_inside = 0;
33972 struct declare_alias_data data = {file, false};
33974 strcpy (buffer, name);
33975 p = strchr (buffer, '$');
33976 while (p) {
33977 *p = '_';
33978 dollar_inside++;
33979 p = strchr (p + 1, '$');
33981 if (TREE_PUBLIC (decl))
33983 if (!RS6000_WEAK || !DECL_WEAK (decl))
33985 if (dollar_inside) {
33986 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33987 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33989 fputs ("\t.globl .", file);
33990 RS6000_OUTPUT_BASENAME (file, buffer);
33991 #ifdef HAVE_GAS_HIDDEN
33992 fputs (rs6000_xcoff_visibility (decl), file);
33993 #endif
33994 putc ('\n', file);
33997 else
33999 if (dollar_inside) {
34000 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34001 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34003 fputs ("\t.lglobl .", file);
34004 RS6000_OUTPUT_BASENAME (file, buffer);
34005 putc ('\n', file);
34007 fputs ("\t.csect ", file);
34008 RS6000_OUTPUT_BASENAME (file, buffer);
34009 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
34010 RS6000_OUTPUT_BASENAME (file, buffer);
34011 fputs (":\n", file);
34012 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34013 &data, true);
34014 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
34015 RS6000_OUTPUT_BASENAME (file, buffer);
34016 fputs (", TOC[tc0], 0\n", file);
34017 in_section = NULL;
34018 switch_to_section (function_section (decl));
34019 putc ('.', file);
34020 RS6000_OUTPUT_BASENAME (file, buffer);
34021 fputs (":\n", file);
34022 data.function_descriptor = true;
34023 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34024 &data, true);
34025 if (!DECL_IGNORED_P (decl))
34027 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34028 xcoffout_declare_function (file, decl, buffer);
34029 else if (write_symbols == DWARF2_DEBUG)
34031 name = (*targetm.strip_name_encoding) (name);
34032 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
34035 return;
34039 /* Output assembly language to globalize a symbol from a DECL,
34040 possibly with visibility. */
34042 void
34043 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
34045 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
34046 fputs (GLOBAL_ASM_OP, stream);
34047 RS6000_OUTPUT_BASENAME (stream, name);
34048 #ifdef HAVE_GAS_HIDDEN
34049 fputs (rs6000_xcoff_visibility (decl), stream);
34050 #endif
34051 putc ('\n', stream);
34054 /* Output assembly language to define a symbol as COMMON from a DECL,
34055 possibly with visibility. */
34057 void
34058 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
34059 tree decl ATTRIBUTE_UNUSED,
34060 const char *name,
34061 unsigned HOST_WIDE_INT size,
34062 unsigned HOST_WIDE_INT align)
34064 unsigned HOST_WIDE_INT align2 = 2;
34066 if (align > 32)
34067 align2 = floor_log2 (align / BITS_PER_UNIT);
34068 else if (size > 4)
34069 align2 = 3;
34071 fputs (COMMON_ASM_OP, stream);
34072 RS6000_OUTPUT_BASENAME (stream, name);
34074 fprintf (stream,
34075 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
34076 size, align2);
34078 #ifdef HAVE_GAS_HIDDEN
34079 fputs (rs6000_xcoff_visibility (decl), stream);
34080 #endif
34081 putc ('\n', stream);
34084 /* This macro produces the initial definition of a object (variable) name.
34085 Because AIX assembler's .set command has unexpected semantics, we output
34086 all aliases as alternative labels in front of the definition. */
34088 void
34089 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34091 struct declare_alias_data data = {file, false};
34092 RS6000_OUTPUT_BASENAME (file, name);
34093 fputs (":\n", file);
34094 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34095 &data, true);
34098 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34100 void
34101 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34103 fputs (integer_asm_op (size, FALSE), file);
34104 assemble_name (file, label);
34105 fputs ("-$", file);
34108 /* Output a symbol offset relative to the dbase for the current object.
34109 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34110 signed offsets.
34112 __gcc_unwind_dbase is embedded in all executables/libraries through
34113 libgcc/config/rs6000/crtdbase.S. */
34115 void
34116 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34118 fputs (integer_asm_op (size, FALSE), file);
34119 assemble_name (file, label);
34120 fputs("-__gcc_unwind_dbase", file);
34123 #ifdef HAVE_AS_TLS
34124 static void
34125 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34127 rtx symbol;
34128 int flags;
34129 const char *symname;
34131 default_encode_section_info (decl, rtl, first);
34133 /* Careful not to prod global register variables. */
34134 if (!MEM_P (rtl))
34135 return;
34136 symbol = XEXP (rtl, 0);
34137 if (GET_CODE (symbol) != SYMBOL_REF)
34138 return;
34140 flags = SYMBOL_REF_FLAGS (symbol);
34142 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34143 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34145 SYMBOL_REF_FLAGS (symbol) = flags;
34147 /* Append mapping class to extern decls. */
34148 symname = XSTR (symbol, 0);
34149 if (decl /* sync condition with assemble_external () */
34150 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
34151 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
34152 || TREE_CODE (decl) == FUNCTION_DECL)
34153 && symname[strlen (symname) - 1] != ']')
34155 char *newname = (char *) alloca (strlen (symname) + 5);
34156 strcpy (newname, symname);
34157 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
34158 ? "[DS]" : "[UA]"));
34159 XSTR (symbol, 0) = ggc_strdup (newname);
34162 #endif /* HAVE_AS_TLS */
34163 #endif /* TARGET_XCOFF */
34165 void
34166 rs6000_asm_weaken_decl (FILE *stream, tree decl,
34167 const char *name, const char *val)
34169 fputs ("\t.weak\t", stream);
34170 RS6000_OUTPUT_BASENAME (stream, name);
34171 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34172 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34174 if (TARGET_XCOFF)
34175 fputs ("[DS]", stream);
34176 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34177 if (TARGET_XCOFF)
34178 fputs (rs6000_xcoff_visibility (decl), stream);
34179 #endif
34180 fputs ("\n\t.weak\t.", stream);
34181 RS6000_OUTPUT_BASENAME (stream, name);
34183 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34184 if (TARGET_XCOFF)
34185 fputs (rs6000_xcoff_visibility (decl), stream);
34186 #endif
34187 fputc ('\n', stream);
34188 if (val)
34190 #ifdef ASM_OUTPUT_DEF
34191 ASM_OUTPUT_DEF (stream, name, val);
34192 #endif
34193 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34194 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34196 fputs ("\t.set\t.", stream);
34197 RS6000_OUTPUT_BASENAME (stream, name);
34198 fputs (",.", stream);
34199 RS6000_OUTPUT_BASENAME (stream, val);
34200 fputc ('\n', stream);
34206 /* Return true if INSN should not be copied. */
34208 static bool
34209 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34211 return recog_memoized (insn) >= 0
34212 && get_attr_cannot_copy (insn);
34215 /* Compute a (partial) cost for rtx X. Return true if the complete
34216 cost has been computed, and false if subexpressions should be
34217 scanned. In either case, *TOTAL contains the cost result. */
34219 static bool
34220 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34221 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34223 int code = GET_CODE (x);
34225 switch (code)
34227 /* On the RS/6000, if it is valid in the insn, it is free. */
34228 case CONST_INT:
34229 if (((outer_code == SET
34230 || outer_code == PLUS
34231 || outer_code == MINUS)
34232 && (satisfies_constraint_I (x)
34233 || satisfies_constraint_L (x)))
34234 || (outer_code == AND
34235 && (satisfies_constraint_K (x)
34236 || (mode == SImode
34237 ? satisfies_constraint_L (x)
34238 : satisfies_constraint_J (x))))
34239 || ((outer_code == IOR || outer_code == XOR)
34240 && (satisfies_constraint_K (x)
34241 || (mode == SImode
34242 ? satisfies_constraint_L (x)
34243 : satisfies_constraint_J (x))))
34244 || outer_code == ASHIFT
34245 || outer_code == ASHIFTRT
34246 || outer_code == LSHIFTRT
34247 || outer_code == ROTATE
34248 || outer_code == ROTATERT
34249 || outer_code == ZERO_EXTRACT
34250 || (outer_code == MULT
34251 && satisfies_constraint_I (x))
34252 || ((outer_code == DIV || outer_code == UDIV
34253 || outer_code == MOD || outer_code == UMOD)
34254 && exact_log2 (INTVAL (x)) >= 0)
34255 || (outer_code == COMPARE
34256 && (satisfies_constraint_I (x)
34257 || satisfies_constraint_K (x)))
34258 || ((outer_code == EQ || outer_code == NE)
34259 && (satisfies_constraint_I (x)
34260 || satisfies_constraint_K (x)
34261 || (mode == SImode
34262 ? satisfies_constraint_L (x)
34263 : satisfies_constraint_J (x))))
34264 || (outer_code == GTU
34265 && satisfies_constraint_I (x))
34266 || (outer_code == LTU
34267 && satisfies_constraint_P (x)))
34269 *total = 0;
34270 return true;
34272 else if ((outer_code == PLUS
34273 && reg_or_add_cint_operand (x, VOIDmode))
34274 || (outer_code == MINUS
34275 && reg_or_sub_cint_operand (x, VOIDmode))
34276 || ((outer_code == SET
34277 || outer_code == IOR
34278 || outer_code == XOR)
34279 && (INTVAL (x)
34280 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34282 *total = COSTS_N_INSNS (1);
34283 return true;
34285 /* FALLTHRU */
34287 case CONST_DOUBLE:
34288 case CONST_WIDE_INT:
34289 case CONST:
34290 case HIGH:
34291 case SYMBOL_REF:
34292 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34293 return true;
34295 case MEM:
34296 /* When optimizing for size, MEM should be slightly more expensive
34297 than generating address, e.g., (plus (reg) (const)).
34298 L1 cache latency is about two instructions. */
34299 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34300 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
34301 *total += COSTS_N_INSNS (100);
34302 return true;
34304 case LABEL_REF:
34305 *total = 0;
34306 return true;
34308 case PLUS:
34309 case MINUS:
34310 if (FLOAT_MODE_P (mode))
34311 *total = rs6000_cost->fp;
34312 else
34313 *total = COSTS_N_INSNS (1);
34314 return false;
34316 case MULT:
34317 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34318 && satisfies_constraint_I (XEXP (x, 1)))
34320 if (INTVAL (XEXP (x, 1)) >= -256
34321 && INTVAL (XEXP (x, 1)) <= 255)
34322 *total = rs6000_cost->mulsi_const9;
34323 else
34324 *total = rs6000_cost->mulsi_const;
34326 else if (mode == SFmode)
34327 *total = rs6000_cost->fp;
34328 else if (FLOAT_MODE_P (mode))
34329 *total = rs6000_cost->dmul;
34330 else if (mode == DImode)
34331 *total = rs6000_cost->muldi;
34332 else
34333 *total = rs6000_cost->mulsi;
34334 return false;
34336 case FMA:
34337 if (mode == SFmode)
34338 *total = rs6000_cost->fp;
34339 else
34340 *total = rs6000_cost->dmul;
34341 break;
34343 case DIV:
34344 case MOD:
34345 if (FLOAT_MODE_P (mode))
34347 *total = mode == DFmode ? rs6000_cost->ddiv
34348 : rs6000_cost->sdiv;
34349 return false;
34351 /* FALLTHRU */
34353 case UDIV:
34354 case UMOD:
34355 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34356 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34358 if (code == DIV || code == MOD)
34359 /* Shift, addze */
34360 *total = COSTS_N_INSNS (2);
34361 else
34362 /* Shift */
34363 *total = COSTS_N_INSNS (1);
34365 else
34367 if (GET_MODE (XEXP (x, 1)) == DImode)
34368 *total = rs6000_cost->divdi;
34369 else
34370 *total = rs6000_cost->divsi;
34372 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34373 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34374 *total += COSTS_N_INSNS (2);
34375 return false;
34377 case CTZ:
34378 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34379 return false;
34381 case FFS:
34382 *total = COSTS_N_INSNS (4);
34383 return false;
34385 case POPCOUNT:
34386 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34387 return false;
34389 case PARITY:
34390 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34391 return false;
34393 case NOT:
34394 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34395 *total = 0;
34396 else
34397 *total = COSTS_N_INSNS (1);
34398 return false;
34400 case AND:
34401 if (CONST_INT_P (XEXP (x, 1)))
34403 rtx left = XEXP (x, 0);
34404 rtx_code left_code = GET_CODE (left);
34406 /* rotate-and-mask: 1 insn. */
34407 if ((left_code == ROTATE
34408 || left_code == ASHIFT
34409 || left_code == LSHIFTRT)
34410 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34412 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34413 if (!CONST_INT_P (XEXP (left, 1)))
34414 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34415 *total += COSTS_N_INSNS (1);
34416 return true;
34419 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34420 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34421 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34422 || (val & 0xffff) == val
34423 || (val & 0xffff0000) == val
34424 || ((val & 0xffff) == 0 && mode == SImode))
34426 *total = rtx_cost (left, mode, AND, 0, speed);
34427 *total += COSTS_N_INSNS (1);
34428 return true;
34431 /* 2 insns. */
34432 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34434 *total = rtx_cost (left, mode, AND, 0, speed);
34435 *total += COSTS_N_INSNS (2);
34436 return true;
34440 *total = COSTS_N_INSNS (1);
34441 return false;
34443 case IOR:
34444 /* FIXME */
34445 *total = COSTS_N_INSNS (1);
34446 return true;
34448 case CLZ:
34449 case XOR:
34450 case ZERO_EXTRACT:
34451 *total = COSTS_N_INSNS (1);
34452 return false;
34454 case ASHIFT:
34455 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34456 the sign extend and shift separately within the insn. */
34457 if (TARGET_EXTSWSLI && mode == DImode
34458 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34459 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34461 *total = 0;
34462 return false;
34464 /* fall through */
34466 case ASHIFTRT:
34467 case LSHIFTRT:
34468 case ROTATE:
34469 case ROTATERT:
34470 /* Handle mul_highpart. */
34471 if (outer_code == TRUNCATE
34472 && GET_CODE (XEXP (x, 0)) == MULT)
34474 if (mode == DImode)
34475 *total = rs6000_cost->muldi;
34476 else
34477 *total = rs6000_cost->mulsi;
34478 return true;
34480 else if (outer_code == AND)
34481 *total = 0;
34482 else
34483 *total = COSTS_N_INSNS (1);
34484 return false;
34486 case SIGN_EXTEND:
34487 case ZERO_EXTEND:
34488 if (GET_CODE (XEXP (x, 0)) == MEM)
34489 *total = 0;
34490 else
34491 *total = COSTS_N_INSNS (1);
34492 return false;
34494 case COMPARE:
34495 case NEG:
34496 case ABS:
34497 if (!FLOAT_MODE_P (mode))
34499 *total = COSTS_N_INSNS (1);
34500 return false;
34502 /* FALLTHRU */
34504 case FLOAT:
34505 case UNSIGNED_FLOAT:
34506 case FIX:
34507 case UNSIGNED_FIX:
34508 case FLOAT_TRUNCATE:
34509 *total = rs6000_cost->fp;
34510 return false;
34512 case FLOAT_EXTEND:
34513 if (mode == DFmode)
34514 *total = rs6000_cost->sfdf_convert;
34515 else
34516 *total = rs6000_cost->fp;
34517 return false;
34519 case UNSPEC:
34520 switch (XINT (x, 1))
34522 case UNSPEC_FRSP:
34523 *total = rs6000_cost->fp;
34524 return true;
34526 default:
34527 break;
34529 break;
34531 case CALL:
34532 case IF_THEN_ELSE:
34533 if (!speed)
34535 *total = COSTS_N_INSNS (1);
34536 return true;
34538 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34540 *total = rs6000_cost->fp;
34541 return false;
34543 break;
34545 case NE:
34546 case EQ:
34547 case GTU:
34548 case LTU:
34549 /* Carry bit requires mode == Pmode.
34550 NEG or PLUS already counted so only add one. */
34551 if (mode == Pmode
34552 && (outer_code == NEG || outer_code == PLUS))
34554 *total = COSTS_N_INSNS (1);
34555 return true;
34557 if (outer_code == SET)
34559 if (XEXP (x, 1) == const0_rtx)
34561 if (TARGET_ISEL && !TARGET_MFCRF)
34562 *total = COSTS_N_INSNS (8);
34563 else
34564 *total = COSTS_N_INSNS (2);
34565 return true;
34567 else
34569 *total = COSTS_N_INSNS (3);
34570 return false;
34573 /* FALLTHRU */
34575 case GT:
34576 case LT:
34577 case UNORDERED:
34578 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
34580 if (TARGET_ISEL && !TARGET_MFCRF)
34581 *total = COSTS_N_INSNS (8);
34582 else
34583 *total = COSTS_N_INSNS (2);
34584 return true;
34586 /* CC COMPARE. */
34587 if (outer_code == COMPARE)
34589 *total = 0;
34590 return true;
34592 break;
34594 default:
34595 break;
34598 return false;
34601 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34603 static bool
34604 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34605 int opno, int *total, bool speed)
34607 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34609 fprintf (stderr,
34610 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34611 "opno = %d, total = %d, speed = %s, x:\n",
34612 ret ? "complete" : "scan inner",
34613 GET_MODE_NAME (mode),
34614 GET_RTX_NAME (outer_code),
34615 opno,
34616 *total,
34617 speed ? "true" : "false");
34619 debug_rtx (x);
34621 return ret;
34624 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34626 static int
34627 rs6000_debug_address_cost (rtx x, machine_mode mode,
34628 addr_space_t as, bool speed)
34630 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34632 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34633 ret, speed ? "true" : "false");
34634 debug_rtx (x);
34636 return ret;
34640 /* A C expression returning the cost of moving data from a register of class
34641 CLASS1 to one of CLASS2. */
34643 static int
34644 rs6000_register_move_cost (machine_mode mode,
34645 reg_class_t from, reg_class_t to)
34647 int ret;
34649 if (TARGET_DEBUG_COST)
34650 dbg_cost_ctrl++;
34652 /* Moves from/to GENERAL_REGS. */
34653 if (reg_classes_intersect_p (to, GENERAL_REGS)
34654 || reg_classes_intersect_p (from, GENERAL_REGS))
34656 reg_class_t rclass = from;
34658 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34659 rclass = to;
34661 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34662 ret = (rs6000_memory_move_cost (mode, rclass, false)
34663 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34665 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34666 shift. */
34667 else if (rclass == CR_REGS)
34668 ret = 4;
34670 /* For those processors that have slow LR/CTR moves, make them more
34671 expensive than memory in order to bias spills to memory .*/
34672 else if ((rs6000_cpu == PROCESSOR_POWER6
34673 || rs6000_cpu == PROCESSOR_POWER7
34674 || rs6000_cpu == PROCESSOR_POWER8
34675 || rs6000_cpu == PROCESSOR_POWER9)
34676 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34677 ret = 6 * hard_regno_nregs[0][mode];
34679 else
34680 /* A move will cost one instruction per GPR moved. */
34681 ret = 2 * hard_regno_nregs[0][mode];
34684 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34685 else if (VECTOR_MEM_VSX_P (mode)
34686 && reg_classes_intersect_p (to, VSX_REGS)
34687 && reg_classes_intersect_p (from, VSX_REGS))
34688 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
34690 /* Moving between two similar registers is just one instruction. */
34691 else if (reg_classes_intersect_p (to, from))
34692 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34694 /* Everything else has to go through GENERAL_REGS. */
34695 else
34696 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34697 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34699 if (TARGET_DEBUG_COST)
34701 if (dbg_cost_ctrl == 1)
34702 fprintf (stderr,
34703 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34704 ret, GET_MODE_NAME (mode), reg_class_names[from],
34705 reg_class_names[to]);
34706 dbg_cost_ctrl--;
34709 return ret;
34712 /* A C expressions returning the cost of moving data of MODE from a register to
34713 or from memory. */
34715 static int
34716 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34717 bool in ATTRIBUTE_UNUSED)
34719 int ret;
34721 if (TARGET_DEBUG_COST)
34722 dbg_cost_ctrl++;
34724 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34725 ret = 4 * hard_regno_nregs[0][mode];
34726 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34727 || reg_classes_intersect_p (rclass, VSX_REGS)))
34728 ret = 4 * hard_regno_nregs[32][mode];
34729 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34730 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
34731 else
34732 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34734 if (TARGET_DEBUG_COST)
34736 if (dbg_cost_ctrl == 1)
34737 fprintf (stderr,
34738 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34739 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34740 dbg_cost_ctrl--;
34743 return ret;
34746 /* Returns a code for a target-specific builtin that implements
34747 reciprocal of the function, or NULL_TREE if not available. */
34749 static tree
34750 rs6000_builtin_reciprocal (tree fndecl)
34752 switch (DECL_FUNCTION_CODE (fndecl))
34754 case VSX_BUILTIN_XVSQRTDP:
34755 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34756 return NULL_TREE;
34758 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34760 case VSX_BUILTIN_XVSQRTSP:
34761 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34762 return NULL_TREE;
34764 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34766 default:
34767 return NULL_TREE;
34771 /* Load up a constant. If the mode is a vector mode, splat the value across
34772 all of the vector elements. */
34774 static rtx
34775 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34777 rtx reg;
34779 if (mode == SFmode || mode == DFmode)
34781 rtx d = const_double_from_real_value (dconst, mode);
34782 reg = force_reg (mode, d);
34784 else if (mode == V4SFmode)
34786 rtx d = const_double_from_real_value (dconst, SFmode);
34787 rtvec v = gen_rtvec (4, d, d, d, d);
34788 reg = gen_reg_rtx (mode);
34789 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34791 else if (mode == V2DFmode)
34793 rtx d = const_double_from_real_value (dconst, DFmode);
34794 rtvec v = gen_rtvec (2, d, d);
34795 reg = gen_reg_rtx (mode);
34796 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34798 else
34799 gcc_unreachable ();
34801 return reg;
34804 /* Generate an FMA instruction. */
34806 static void
34807 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34809 machine_mode mode = GET_MODE (target);
34810 rtx dst;
34812 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34813 gcc_assert (dst != NULL);
34815 if (dst != target)
34816 emit_move_insn (target, dst);
34819 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34821 static void
34822 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34824 machine_mode mode = GET_MODE (dst);
34825 rtx r;
34827 /* This is a tad more complicated, since the fnma_optab is for
34828 a different expression: fma(-m1, m2, a), which is the same
34829 thing except in the case of signed zeros.
34831 Fortunately we know that if FMA is supported that FNMSUB is
34832 also supported in the ISA. Just expand it directly. */
34834 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34836 r = gen_rtx_NEG (mode, a);
34837 r = gen_rtx_FMA (mode, m1, m2, r);
34838 r = gen_rtx_NEG (mode, r);
34839 emit_insn (gen_rtx_SET (dst, r));
34842 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34843 add a reg_note saying that this was a division. Support both scalar and
34844 vector divide. Assumes no trapping math and finite arguments. */
34846 void
34847 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34849 machine_mode mode = GET_MODE (dst);
34850 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34851 int i;
34853 /* Low precision estimates guarantee 5 bits of accuracy. High
34854 precision estimates guarantee 14 bits of accuracy. SFmode
34855 requires 23 bits of accuracy. DFmode requires 52 bits of
34856 accuracy. Each pass at least doubles the accuracy, leading
34857 to the following. */
34858 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34859 if (mode == DFmode || mode == V2DFmode)
34860 passes++;
34862 enum insn_code code = optab_handler (smul_optab, mode);
34863 insn_gen_fn gen_mul = GEN_FCN (code);
34865 gcc_assert (code != CODE_FOR_nothing);
34867 one = rs6000_load_constant_and_splat (mode, dconst1);
34869 /* x0 = 1./d estimate */
34870 x0 = gen_reg_rtx (mode);
34871 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
34872 UNSPEC_FRES)));
34874 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
34875 if (passes > 1) {
34877 /* e0 = 1. - d * x0 */
34878 e0 = gen_reg_rtx (mode);
34879 rs6000_emit_nmsub (e0, d, x0, one);
34881 /* x1 = x0 + e0 * x0 */
34882 x1 = gen_reg_rtx (mode);
34883 rs6000_emit_madd (x1, e0, x0, x0);
34885 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
34886 ++i, xprev = xnext, eprev = enext) {
34888 /* enext = eprev * eprev */
34889 enext = gen_reg_rtx (mode);
34890 emit_insn (gen_mul (enext, eprev, eprev));
34892 /* xnext = xprev + enext * xprev */
34893 xnext = gen_reg_rtx (mode);
34894 rs6000_emit_madd (xnext, enext, xprev, xprev);
34897 } else
34898 xprev = x0;
34900 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
34902 /* u = n * xprev */
34903 u = gen_reg_rtx (mode);
34904 emit_insn (gen_mul (u, n, xprev));
34906 /* v = n - (d * u) */
34907 v = gen_reg_rtx (mode);
34908 rs6000_emit_nmsub (v, d, u, n);
34910 /* dst = (v * xprev) + u */
34911 rs6000_emit_madd (dst, v, xprev, u);
34913 if (note_p)
34914 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
34917 /* Goldschmidt's Algorithm for single/double-precision floating point
34918 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
34920 void
34921 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
34923 machine_mode mode = GET_MODE (src);
34924 rtx e = gen_reg_rtx (mode);
34925 rtx g = gen_reg_rtx (mode);
34926 rtx h = gen_reg_rtx (mode);
34928 /* Low precision estimates guarantee 5 bits of accuracy. High
34929 precision estimates guarantee 14 bits of accuracy. SFmode
34930 requires 23 bits of accuracy. DFmode requires 52 bits of
34931 accuracy. Each pass at least doubles the accuracy, leading
34932 to the following. */
34933 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34934 if (mode == DFmode || mode == V2DFmode)
34935 passes++;
34937 int i;
34938 rtx mhalf;
34939 enum insn_code code = optab_handler (smul_optab, mode);
34940 insn_gen_fn gen_mul = GEN_FCN (code);
34942 gcc_assert (code != CODE_FOR_nothing);
34944 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
34946 /* e = rsqrt estimate */
34947 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
34948 UNSPEC_RSQRT)));
34950 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
34951 if (!recip)
34953 rtx zero = force_reg (mode, CONST0_RTX (mode));
34955 if (mode == SFmode)
34957 rtx target = emit_conditional_move (e, GT, src, zero, mode,
34958 e, zero, mode, 0);
34959 if (target != e)
34960 emit_move_insn (e, target);
34962 else
34964 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
34965 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
34969 /* g = sqrt estimate. */
34970 emit_insn (gen_mul (g, e, src));
34971 /* h = 1/(2*sqrt) estimate. */
34972 emit_insn (gen_mul (h, e, mhalf));
34974 if (recip)
34976 if (passes == 1)
34978 rtx t = gen_reg_rtx (mode);
34979 rs6000_emit_nmsub (t, g, h, mhalf);
34980 /* Apply correction directly to 1/rsqrt estimate. */
34981 rs6000_emit_madd (dst, e, t, e);
34983 else
34985 for (i = 0; i < passes; i++)
34987 rtx t1 = gen_reg_rtx (mode);
34988 rtx g1 = gen_reg_rtx (mode);
34989 rtx h1 = gen_reg_rtx (mode);
34991 rs6000_emit_nmsub (t1, g, h, mhalf);
34992 rs6000_emit_madd (g1, g, t1, g);
34993 rs6000_emit_madd (h1, h, t1, h);
34995 g = g1;
34996 h = h1;
34998 /* Multiply by 2 for 1/rsqrt. */
34999 emit_insn (gen_add3_insn (dst, h, h));
35002 else
35004 rtx t = gen_reg_rtx (mode);
35005 rs6000_emit_nmsub (t, g, h, mhalf);
35006 rs6000_emit_madd (dst, g, t, g);
35009 return;
35012 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35013 (Power7) targets. DST is the target, and SRC is the argument operand. */
35015 void
35016 rs6000_emit_popcount (rtx dst, rtx src)
35018 machine_mode mode = GET_MODE (dst);
35019 rtx tmp1, tmp2;
35021 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35022 if (TARGET_POPCNTD)
35024 if (mode == SImode)
35025 emit_insn (gen_popcntdsi2 (dst, src));
35026 else
35027 emit_insn (gen_popcntddi2 (dst, src));
35028 return;
35031 tmp1 = gen_reg_rtx (mode);
35033 if (mode == SImode)
35035 emit_insn (gen_popcntbsi2 (tmp1, src));
35036 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35037 NULL_RTX, 0);
35038 tmp2 = force_reg (SImode, tmp2);
35039 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35041 else
35043 emit_insn (gen_popcntbdi2 (tmp1, src));
35044 tmp2 = expand_mult (DImode, tmp1,
35045 GEN_INT ((HOST_WIDE_INT)
35046 0x01010101 << 32 | 0x01010101),
35047 NULL_RTX, 0);
35048 tmp2 = force_reg (DImode, tmp2);
35049 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35054 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35055 target, and SRC is the argument operand. */
35057 void
35058 rs6000_emit_parity (rtx dst, rtx src)
35060 machine_mode mode = GET_MODE (dst);
35061 rtx tmp;
35063 tmp = gen_reg_rtx (mode);
35065 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35066 if (TARGET_CMPB)
35068 if (mode == SImode)
35070 emit_insn (gen_popcntbsi2 (tmp, src));
35071 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35073 else
35075 emit_insn (gen_popcntbdi2 (tmp, src));
35076 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35078 return;
35081 if (mode == SImode)
35083 /* Is mult+shift >= shift+xor+shift+xor? */
35084 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35086 rtx tmp1, tmp2, tmp3, tmp4;
35088 tmp1 = gen_reg_rtx (SImode);
35089 emit_insn (gen_popcntbsi2 (tmp1, src));
35091 tmp2 = gen_reg_rtx (SImode);
35092 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35093 tmp3 = gen_reg_rtx (SImode);
35094 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35096 tmp4 = gen_reg_rtx (SImode);
35097 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35098 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35100 else
35101 rs6000_emit_popcount (tmp, src);
35102 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35104 else
35106 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35107 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35109 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35111 tmp1 = gen_reg_rtx (DImode);
35112 emit_insn (gen_popcntbdi2 (tmp1, src));
35114 tmp2 = gen_reg_rtx (DImode);
35115 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35116 tmp3 = gen_reg_rtx (DImode);
35117 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35119 tmp4 = gen_reg_rtx (DImode);
35120 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35121 tmp5 = gen_reg_rtx (DImode);
35122 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35124 tmp6 = gen_reg_rtx (DImode);
35125 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35126 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35128 else
35129 rs6000_emit_popcount (tmp, src);
35130 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35134 /* Expand an Altivec constant permutation for little endian mode.
35135 There are two issues: First, the two input operands must be
35136 swapped so that together they form a double-wide array in LE
35137 order. Second, the vperm instruction has surprising behavior
35138 in LE mode: it interprets the elements of the source vectors
35139 in BE mode ("left to right") and interprets the elements of
35140 the destination vector in LE mode ("right to left"). To
35141 correct for this, we must subtract each element of the permute
35142 control vector from 31.
35144 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35145 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35146 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35147 serve as the permute control vector. Then, in BE mode,
35149 vperm 9,10,11,12
35151 places the desired result in vr9. However, in LE mode the
35152 vector contents will be
35154 vr10 = 00000003 00000002 00000001 00000000
35155 vr11 = 00000007 00000006 00000005 00000004
35157 The result of the vperm using the same permute control vector is
35159 vr9 = 05000000 07000000 01000000 03000000
35161 That is, the leftmost 4 bytes of vr10 are interpreted as the
35162 source for the rightmost 4 bytes of vr9, and so on.
35164 If we change the permute control vector to
35166 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35168 and issue
35170 vperm 9,11,10,12
35172 we get the desired
35174 vr9 = 00000006 00000004 00000002 00000000. */
35176 void
35177 altivec_expand_vec_perm_const_le (rtx operands[4])
35179 unsigned int i;
35180 rtx perm[16];
35181 rtx constv, unspec;
35182 rtx target = operands[0];
35183 rtx op0 = operands[1];
35184 rtx op1 = operands[2];
35185 rtx sel = operands[3];
35187 /* Unpack and adjust the constant selector. */
35188 for (i = 0; i < 16; ++i)
35190 rtx e = XVECEXP (sel, 0, i);
35191 unsigned int elt = 31 - (INTVAL (e) & 31);
35192 perm[i] = GEN_INT (elt);
35195 /* Expand to a permute, swapping the inputs and using the
35196 adjusted selector. */
35197 if (!REG_P (op0))
35198 op0 = force_reg (V16QImode, op0);
35199 if (!REG_P (op1))
35200 op1 = force_reg (V16QImode, op1);
35202 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35203 constv = force_reg (V16QImode, constv);
35204 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35205 UNSPEC_VPERM);
35206 if (!REG_P (target))
35208 rtx tmp = gen_reg_rtx (V16QImode);
35209 emit_move_insn (tmp, unspec);
35210 unspec = tmp;
35213 emit_move_insn (target, unspec);
35216 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35217 permute control vector. But here it's not a constant, so we must
35218 generate a vector NAND or NOR to do the adjustment. */
35220 void
35221 altivec_expand_vec_perm_le (rtx operands[4])
35223 rtx notx, iorx, unspec;
35224 rtx target = operands[0];
35225 rtx op0 = operands[1];
35226 rtx op1 = operands[2];
35227 rtx sel = operands[3];
35228 rtx tmp = target;
35229 rtx norreg = gen_reg_rtx (V16QImode);
35230 machine_mode mode = GET_MODE (target);
35232 /* Get everything in regs so the pattern matches. */
35233 if (!REG_P (op0))
35234 op0 = force_reg (mode, op0);
35235 if (!REG_P (op1))
35236 op1 = force_reg (mode, op1);
35237 if (!REG_P (sel))
35238 sel = force_reg (V16QImode, sel);
35239 if (!REG_P (target))
35240 tmp = gen_reg_rtx (mode);
35242 if (TARGET_P9_VECTOR)
35244 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
35245 UNSPEC_VPERMR);
35247 else
35249 /* Invert the selector with a VNAND if available, else a VNOR.
35250 The VNAND is preferred for future fusion opportunities. */
35251 notx = gen_rtx_NOT (V16QImode, sel);
35252 iorx = (TARGET_P8_VECTOR
35253 ? gen_rtx_IOR (V16QImode, notx, notx)
35254 : gen_rtx_AND (V16QImode, notx, notx));
35255 emit_insn (gen_rtx_SET (norreg, iorx));
35257 /* Permute with operands reversed and adjusted selector. */
35258 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35259 UNSPEC_VPERM);
35262 /* Copy into target, possibly by way of a register. */
35263 if (!REG_P (target))
35265 emit_move_insn (tmp, unspec);
35266 unspec = tmp;
35269 emit_move_insn (target, unspec);
35272 /* Expand an Altivec constant permutation. Return true if we match
35273 an efficient implementation; false to fall back to VPERM. */
35275 bool
35276 altivec_expand_vec_perm_const (rtx operands[4])
35278 struct altivec_perm_insn {
35279 HOST_WIDE_INT mask;
35280 enum insn_code impl;
35281 unsigned char perm[16];
35283 static const struct altivec_perm_insn patterns[] = {
35284 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35285 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35286 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35287 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35288 { OPTION_MASK_ALTIVEC,
35289 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35290 : CODE_FOR_altivec_vmrglb_direct),
35291 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35292 { OPTION_MASK_ALTIVEC,
35293 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35294 : CODE_FOR_altivec_vmrglh_direct),
35295 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35296 { OPTION_MASK_ALTIVEC,
35297 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35298 : CODE_FOR_altivec_vmrglw_direct),
35299 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35300 { OPTION_MASK_ALTIVEC,
35301 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35302 : CODE_FOR_altivec_vmrghb_direct),
35303 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35304 { OPTION_MASK_ALTIVEC,
35305 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35306 : CODE_FOR_altivec_vmrghh_direct),
35307 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35308 { OPTION_MASK_ALTIVEC,
35309 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35310 : CODE_FOR_altivec_vmrghw_direct),
35311 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35312 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew_v4si,
35313 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35314 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
35315 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35318 unsigned int i, j, elt, which;
35319 unsigned char perm[16];
35320 rtx target, op0, op1, sel, x;
35321 bool one_vec;
35323 target = operands[0];
35324 op0 = operands[1];
35325 op1 = operands[2];
35326 sel = operands[3];
35328 /* Unpack the constant selector. */
35329 for (i = which = 0; i < 16; ++i)
35331 rtx e = XVECEXP (sel, 0, i);
35332 elt = INTVAL (e) & 31;
35333 which |= (elt < 16 ? 1 : 2);
35334 perm[i] = elt;
35337 /* Simplify the constant selector based on operands. */
35338 switch (which)
35340 default:
35341 gcc_unreachable ();
35343 case 3:
35344 one_vec = false;
35345 if (!rtx_equal_p (op0, op1))
35346 break;
35347 /* FALLTHRU */
35349 case 2:
35350 for (i = 0; i < 16; ++i)
35351 perm[i] &= 15;
35352 op0 = op1;
35353 one_vec = true;
35354 break;
35356 case 1:
35357 op1 = op0;
35358 one_vec = true;
35359 break;
35362 /* Look for splat patterns. */
35363 if (one_vec)
35365 elt = perm[0];
35367 for (i = 0; i < 16; ++i)
35368 if (perm[i] != elt)
35369 break;
35370 if (i == 16)
35372 if (!BYTES_BIG_ENDIAN)
35373 elt = 15 - elt;
35374 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35375 return true;
35378 if (elt % 2 == 0)
35380 for (i = 0; i < 16; i += 2)
35381 if (perm[i] != elt || perm[i + 1] != elt + 1)
35382 break;
35383 if (i == 16)
35385 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35386 x = gen_reg_rtx (V8HImode);
35387 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35388 GEN_INT (field)));
35389 emit_move_insn (target, gen_lowpart (V16QImode, x));
35390 return true;
35394 if (elt % 4 == 0)
35396 for (i = 0; i < 16; i += 4)
35397 if (perm[i] != elt
35398 || perm[i + 1] != elt + 1
35399 || perm[i + 2] != elt + 2
35400 || perm[i + 3] != elt + 3)
35401 break;
35402 if (i == 16)
35404 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35405 x = gen_reg_rtx (V4SImode);
35406 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35407 GEN_INT (field)));
35408 emit_move_insn (target, gen_lowpart (V16QImode, x));
35409 return true;
35414 /* Look for merge and pack patterns. */
35415 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35417 bool swapped;
35419 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35420 continue;
35422 elt = patterns[j].perm[0];
35423 if (perm[0] == elt)
35424 swapped = false;
35425 else if (perm[0] == elt + 16)
35426 swapped = true;
35427 else
35428 continue;
35429 for (i = 1; i < 16; ++i)
35431 elt = patterns[j].perm[i];
35432 if (swapped)
35433 elt = (elt >= 16 ? elt - 16 : elt + 16);
35434 else if (one_vec && elt >= 16)
35435 elt -= 16;
35436 if (perm[i] != elt)
35437 break;
35439 if (i == 16)
35441 enum insn_code icode = patterns[j].impl;
35442 machine_mode omode = insn_data[icode].operand[0].mode;
35443 machine_mode imode = insn_data[icode].operand[1].mode;
35445 /* For little-endian, don't use vpkuwum and vpkuhum if the
35446 underlying vector type is not V4SI and V8HI, respectively.
35447 For example, using vpkuwum with a V8HI picks up the even
35448 halfwords (BE numbering) when the even halfwords (LE
35449 numbering) are what we need. */
35450 if (!BYTES_BIG_ENDIAN
35451 && icode == CODE_FOR_altivec_vpkuwum_direct
35452 && ((GET_CODE (op0) == REG
35453 && GET_MODE (op0) != V4SImode)
35454 || (GET_CODE (op0) == SUBREG
35455 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35456 continue;
35457 if (!BYTES_BIG_ENDIAN
35458 && icode == CODE_FOR_altivec_vpkuhum_direct
35459 && ((GET_CODE (op0) == REG
35460 && GET_MODE (op0) != V8HImode)
35461 || (GET_CODE (op0) == SUBREG
35462 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35463 continue;
35465 /* For little-endian, the two input operands must be swapped
35466 (or swapped back) to ensure proper right-to-left numbering
35467 from 0 to 2N-1. */
35468 if (swapped ^ !BYTES_BIG_ENDIAN)
35469 std::swap (op0, op1);
35470 if (imode != V16QImode)
35472 op0 = gen_lowpart (imode, op0);
35473 op1 = gen_lowpart (imode, op1);
35475 if (omode == V16QImode)
35476 x = target;
35477 else
35478 x = gen_reg_rtx (omode);
35479 emit_insn (GEN_FCN (icode) (x, op0, op1));
35480 if (omode != V16QImode)
35481 emit_move_insn (target, gen_lowpart (V16QImode, x));
35482 return true;
35486 if (!BYTES_BIG_ENDIAN)
35488 altivec_expand_vec_perm_const_le (operands);
35489 return true;
35492 return false;
35495 /* Expand a Paired Single or VSX Permute Doubleword constant permutation.
35496 Return true if we match an efficient implementation. */
35498 static bool
35499 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35500 unsigned char perm0, unsigned char perm1)
35502 rtx x;
35504 /* If both selectors come from the same operand, fold to single op. */
35505 if ((perm0 & 2) == (perm1 & 2))
35507 if (perm0 & 2)
35508 op0 = op1;
35509 else
35510 op1 = op0;
35512 /* If both operands are equal, fold to simpler permutation. */
35513 if (rtx_equal_p (op0, op1))
35515 perm0 = perm0 & 1;
35516 perm1 = (perm1 & 1) + 2;
35518 /* If the first selector comes from the second operand, swap. */
35519 else if (perm0 & 2)
35521 if (perm1 & 2)
35522 return false;
35523 perm0 -= 2;
35524 perm1 += 2;
35525 std::swap (op0, op1);
35527 /* If the second selector does not come from the second operand, fail. */
35528 else if ((perm1 & 2) == 0)
35529 return false;
35531 /* Success! */
35532 if (target != NULL)
35534 machine_mode vmode, dmode;
35535 rtvec v;
35537 vmode = GET_MODE (target);
35538 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35539 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
35540 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35541 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35542 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35543 emit_insn (gen_rtx_SET (target, x));
35545 return true;
35548 bool
35549 rs6000_expand_vec_perm_const (rtx operands[4])
35551 rtx target, op0, op1, sel;
35552 unsigned char perm0, perm1;
35554 target = operands[0];
35555 op0 = operands[1];
35556 op1 = operands[2];
35557 sel = operands[3];
35559 /* Unpack the constant selector. */
35560 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
35561 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
35563 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
35566 /* Test whether a constant permutation is supported. */
35568 static bool
35569 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
35570 const unsigned char *sel)
35572 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35573 if (TARGET_ALTIVEC)
35574 return true;
35576 /* Check for ps_merge* or evmerge* insns. */
35577 if (TARGET_PAIRED_FLOAT && vmode == V2SFmode)
35579 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35580 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35581 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
35584 return false;
35587 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
35589 static void
35590 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35591 machine_mode vmode, unsigned nelt, rtx perm[])
35593 machine_mode imode;
35594 rtx x;
35596 imode = vmode;
35597 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
35599 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
35600 imode = mode_for_vector (imode, nelt);
35603 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
35604 x = expand_vec_perm (vmode, op0, op1, x, target);
35605 if (x != target)
35606 emit_move_insn (target, x);
35609 /* Expand an extract even operation. */
35611 void
35612 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35614 machine_mode vmode = GET_MODE (target);
35615 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35616 rtx perm[16];
35618 for (i = 0; i < nelt; i++)
35619 perm[i] = GEN_INT (i * 2);
35621 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35624 /* Expand a vector interleave operation. */
35626 void
35627 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35629 machine_mode vmode = GET_MODE (target);
35630 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35631 rtx perm[16];
35633 high = (highp ? 0 : nelt / 2);
35634 for (i = 0; i < nelt / 2; i++)
35636 perm[i * 2] = GEN_INT (i + high);
35637 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
35640 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35643 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35644 void
35645 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35647 HOST_WIDE_INT hwi_scale (scale);
35648 REAL_VALUE_TYPE r_pow;
35649 rtvec v = rtvec_alloc (2);
35650 rtx elt;
35651 rtx scale_vec = gen_reg_rtx (V2DFmode);
35652 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35653 elt = const_double_from_real_value (r_pow, DFmode);
35654 RTVEC_ELT (v, 0) = elt;
35655 RTVEC_ELT (v, 1) = elt;
35656 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35657 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35660 /* Return an RTX representing where to find the function value of a
35661 function returning MODE. */
35662 static rtx
35663 rs6000_complex_function_value (machine_mode mode)
35665 unsigned int regno;
35666 rtx r1, r2;
35667 machine_mode inner = GET_MODE_INNER (mode);
35668 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35670 if (TARGET_FLOAT128_TYPE
35671 && (mode == KCmode
35672 || (mode == TCmode && TARGET_IEEEQUAD)))
35673 regno = ALTIVEC_ARG_RETURN;
35675 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35676 regno = FP_ARG_RETURN;
35678 else
35680 regno = GP_ARG_RETURN;
35682 /* 32-bit is OK since it'll go in r3/r4. */
35683 if (TARGET_32BIT && inner_bytes >= 4)
35684 return gen_rtx_REG (mode, regno);
35687 if (inner_bytes >= 8)
35688 return gen_rtx_REG (mode, regno);
35690 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35691 const0_rtx);
35692 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35693 GEN_INT (inner_bytes));
35694 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35697 /* Return an rtx describing a return value of MODE as a PARALLEL
35698 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35699 stride REG_STRIDE. */
35701 static rtx
35702 rs6000_parallel_return (machine_mode mode,
35703 int n_elts, machine_mode elt_mode,
35704 unsigned int regno, unsigned int reg_stride)
35706 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35708 int i;
35709 for (i = 0; i < n_elts; i++)
35711 rtx r = gen_rtx_REG (elt_mode, regno);
35712 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35713 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35714 regno += reg_stride;
35717 return par;
35720 /* Target hook for TARGET_FUNCTION_VALUE.
35722 An integer value is in r3 and a floating-point value is in fp1,
35723 unless -msoft-float. */
35725 static rtx
35726 rs6000_function_value (const_tree valtype,
35727 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35728 bool outgoing ATTRIBUTE_UNUSED)
35730 machine_mode mode;
35731 unsigned int regno;
35732 machine_mode elt_mode;
35733 int n_elts;
35735 /* Special handling for structs in darwin64. */
35736 if (TARGET_MACHO
35737 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35739 CUMULATIVE_ARGS valcum;
35740 rtx valret;
35742 valcum.words = 0;
35743 valcum.fregno = FP_ARG_MIN_REG;
35744 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35745 /* Do a trial code generation as if this were going to be passed as
35746 an argument; if any part goes in memory, we return NULL. */
35747 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35748 if (valret)
35749 return valret;
35750 /* Otherwise fall through to standard ABI rules. */
35753 mode = TYPE_MODE (valtype);
35755 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35756 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35758 int first_reg, n_regs;
35760 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35762 /* _Decimal128 must use even/odd register pairs. */
35763 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35764 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35766 else
35768 first_reg = ALTIVEC_ARG_RETURN;
35769 n_regs = 1;
35772 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35775 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35776 if (TARGET_32BIT && TARGET_POWERPC64)
35777 switch (mode)
35779 default:
35780 break;
35781 case DImode:
35782 case SCmode:
35783 case DCmode:
35784 case TCmode:
35785 int count = GET_MODE_SIZE (mode) / 4;
35786 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35789 if ((INTEGRAL_TYPE_P (valtype)
35790 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35791 || POINTER_TYPE_P (valtype))
35792 mode = TARGET_32BIT ? SImode : DImode;
35794 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35795 /* _Decimal128 must use an even/odd register pair. */
35796 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35797 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
35798 && !FLOAT128_VECTOR_P (mode)
35799 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
35800 regno = FP_ARG_RETURN;
35801 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35802 && targetm.calls.split_complex_arg)
35803 return rs6000_complex_function_value (mode);
35804 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35805 return register is used in both cases, and we won't see V2DImode/V2DFmode
35806 for pure altivec, combine the two cases. */
35807 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35808 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35809 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35810 regno = ALTIVEC_ARG_RETURN;
35811 else
35812 regno = GP_ARG_RETURN;
35814 return gen_rtx_REG (mode, regno);
35817 /* Define how to find the value returned by a library function
35818 assuming the value has mode MODE. */
35820 rs6000_libcall_value (machine_mode mode)
35822 unsigned int regno;
35824 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35825 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35826 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35828 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35829 /* _Decimal128 must use an even/odd register pair. */
35830 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35831 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
35832 && TARGET_HARD_FLOAT
35833 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
35834 regno = FP_ARG_RETURN;
35835 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35836 return register is used in both cases, and we won't see V2DImode/V2DFmode
35837 for pure altivec, combine the two cases. */
35838 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35839 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35840 regno = ALTIVEC_ARG_RETURN;
35841 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35842 return rs6000_complex_function_value (mode);
35843 else
35844 regno = GP_ARG_RETURN;
35846 return gen_rtx_REG (mode, regno);
35849 /* Compute register pressure classes. We implement the target hook to avoid
35850 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
35851 lead to incorrect estimates of number of available registers and therefor
35852 increased register pressure/spill. */
35853 static int
35854 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
35856 int n;
35858 n = 0;
35859 pressure_classes[n++] = GENERAL_REGS;
35860 if (TARGET_VSX)
35861 pressure_classes[n++] = VSX_REGS;
35862 else
35864 if (TARGET_ALTIVEC)
35865 pressure_classes[n++] = ALTIVEC_REGS;
35866 if (TARGET_HARD_FLOAT)
35867 pressure_classes[n++] = FLOAT_REGS;
35869 pressure_classes[n++] = CR_REGS;
35870 pressure_classes[n++] = SPECIAL_REGS;
35872 return n;
35875 /* Given FROM and TO register numbers, say whether this elimination is allowed.
35876 Frame pointer elimination is automatically handled.
35878 For the RS/6000, if frame pointer elimination is being done, we would like
35879 to convert ap into fp, not sp.
35881 We need r30 if -mminimal-toc was specified, and there are constant pool
35882 references. */
35884 static bool
35885 rs6000_can_eliminate (const int from, const int to)
35887 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
35888 ? ! frame_pointer_needed
35889 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
35890 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
35891 || constant_pool_empty_p ()
35892 : true);
35895 /* Define the offset between two registers, FROM to be eliminated and its
35896 replacement TO, at the start of a routine. */
35897 HOST_WIDE_INT
35898 rs6000_initial_elimination_offset (int from, int to)
35900 rs6000_stack_t *info = rs6000_stack_info ();
35901 HOST_WIDE_INT offset;
35903 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35904 offset = info->push_p ? 0 : -info->total_size;
35905 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35907 offset = info->push_p ? 0 : -info->total_size;
35908 if (FRAME_GROWS_DOWNWARD)
35909 offset += info->fixed_size + info->vars_size + info->parm_size;
35911 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35912 offset = FRAME_GROWS_DOWNWARD
35913 ? info->fixed_size + info->vars_size + info->parm_size
35914 : 0;
35915 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35916 offset = info->total_size;
35917 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35918 offset = info->push_p ? info->total_size : 0;
35919 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
35920 offset = 0;
35921 else
35922 gcc_unreachable ();
35924 return offset;
35927 /* Fill in sizes of registers used by unwinder. */
35929 static void
35930 rs6000_init_dwarf_reg_sizes_extra (tree address)
35932 if (TARGET_MACHO && ! TARGET_ALTIVEC)
35934 int i;
35935 machine_mode mode = TYPE_MODE (char_type_node);
35936 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
35937 rtx mem = gen_rtx_MEM (BLKmode, addr);
35938 rtx value = gen_int_mode (16, mode);
35940 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
35941 The unwinder still needs to know the size of Altivec registers. */
35943 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
35945 int column = DWARF_REG_TO_UNWIND_COLUMN
35946 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
35947 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
35949 emit_move_insn (adjust_address (mem, mode, offset), value);
35954 /* Map internal gcc register numbers to debug format register numbers.
35955 FORMAT specifies the type of debug register number to use:
35956 0 -- debug information, except for frame-related sections
35957 1 -- DWARF .debug_frame section
35958 2 -- DWARF .eh_frame section */
35960 unsigned int
35961 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
35963 /* Except for the above, we use the internal number for non-DWARF
35964 debug information, and also for .eh_frame. */
35965 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
35966 return regno;
35968 /* On some platforms, we use the standard DWARF register
35969 numbering for .debug_info and .debug_frame. */
35970 #ifdef RS6000_USE_DWARF_NUMBERING
35971 if (regno <= 63)
35972 return regno;
35973 if (regno == LR_REGNO)
35974 return 108;
35975 if (regno == CTR_REGNO)
35976 return 109;
35977 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
35978 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
35979 The actual code emitted saves the whole of CR, so we map CR2_REGNO
35980 to the DWARF reg for CR. */
35981 if (format == 1 && regno == CR2_REGNO)
35982 return 64;
35983 if (CR_REGNO_P (regno))
35984 return regno - CR0_REGNO + 86;
35985 if (regno == CA_REGNO)
35986 return 101; /* XER */
35987 if (ALTIVEC_REGNO_P (regno))
35988 return regno - FIRST_ALTIVEC_REGNO + 1124;
35989 if (regno == VRSAVE_REGNO)
35990 return 356;
35991 if (regno == VSCR_REGNO)
35992 return 67;
35993 #endif
35994 return regno;
35997 /* target hook eh_return_filter_mode */
35998 static machine_mode
35999 rs6000_eh_return_filter_mode (void)
36001 return TARGET_32BIT ? SImode : word_mode;
36004 /* Target hook for scalar_mode_supported_p. */
36005 static bool
36006 rs6000_scalar_mode_supported_p (machine_mode mode)
36008 /* -m32 does not support TImode. This is the default, from
36009 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36010 same ABI as for -m32. But default_scalar_mode_supported_p allows
36011 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36012 for -mpowerpc64. */
36013 if (TARGET_32BIT && mode == TImode)
36014 return false;
36016 if (DECIMAL_FLOAT_MODE_P (mode))
36017 return default_decimal_float_supported_p ();
36018 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
36019 return true;
36020 else
36021 return default_scalar_mode_supported_p (mode);
36024 /* Target hook for vector_mode_supported_p. */
36025 static bool
36026 rs6000_vector_mode_supported_p (machine_mode mode)
36029 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
36030 return true;
36032 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36033 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36034 double-double. */
36035 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36036 return true;
36038 else
36039 return false;
36042 /* Target hook for floatn_mode. */
36043 static machine_mode
36044 rs6000_floatn_mode (int n, bool extended)
36046 if (extended)
36048 switch (n)
36050 case 32:
36051 return DFmode;
36053 case 64:
36054 if (TARGET_FLOAT128_KEYWORD)
36055 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36056 else
36057 return VOIDmode;
36059 case 128:
36060 return VOIDmode;
36062 default:
36063 /* Those are the only valid _FloatNx types. */
36064 gcc_unreachable ();
36067 else
36069 switch (n)
36071 case 32:
36072 return SFmode;
36074 case 64:
36075 return DFmode;
36077 case 128:
36078 if (TARGET_FLOAT128_KEYWORD)
36079 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36080 else
36081 return VOIDmode;
36083 default:
36084 return VOIDmode;
36090 /* Target hook for c_mode_for_suffix. */
36091 static machine_mode
36092 rs6000_c_mode_for_suffix (char suffix)
36094 if (TARGET_FLOAT128_TYPE)
36096 if (suffix == 'q' || suffix == 'Q')
36097 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36099 /* At the moment, we are not defining a suffix for IBM extended double.
36100 If/when the default for -mabi=ieeelongdouble is changed, and we want
36101 to support __ibm128 constants in legacy library code, we may need to
36102 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36103 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36104 __float80 constants. */
36107 return VOIDmode;
36110 /* Target hook for invalid_arg_for_unprototyped_fn. */
36111 static const char *
36112 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36114 return (!rs6000_darwin64_abi
36115 && typelist == 0
36116 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36117 && (funcdecl == NULL_TREE
36118 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36119 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36120 ? N_("AltiVec argument passed to unprototyped function")
36121 : NULL;
36124 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36125 setup by using __stack_chk_fail_local hidden function instead of
36126 calling __stack_chk_fail directly. Otherwise it is better to call
36127 __stack_chk_fail directly. */
36129 static tree ATTRIBUTE_UNUSED
36130 rs6000_stack_protect_fail (void)
36132 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36133 ? default_hidden_stack_protect_fail ()
36134 : default_external_stack_protect_fail ();
36137 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36139 #if TARGET_ELF
36140 static unsigned HOST_WIDE_INT
36141 rs6000_asan_shadow_offset (void)
36143 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36145 #endif
36147 /* Mask options that we want to support inside of attribute((target)) and
36148 #pragma GCC target operations. Note, we do not include things like
36149 64/32-bit, endianness, hard/soft floating point, etc. that would have
36150 different calling sequences. */
36152 struct rs6000_opt_mask {
36153 const char *name; /* option name */
36154 HOST_WIDE_INT mask; /* mask to set */
36155 bool invert; /* invert sense of mask */
36156 bool valid_target; /* option is a target option */
36159 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36161 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36162 { "cmpb", OPTION_MASK_CMPB, false, true },
36163 { "crypto", OPTION_MASK_CRYPTO, false, true },
36164 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36165 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36166 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36167 false, true },
36168 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
36169 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
36170 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
36171 { "fprnd", OPTION_MASK_FPRND, false, true },
36172 { "hard-dfp", OPTION_MASK_DFP, false, true },
36173 { "htm", OPTION_MASK_HTM, false, true },
36174 { "isel", OPTION_MASK_ISEL, false, true },
36175 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36176 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36177 { "modulo", OPTION_MASK_MODULO, false, true },
36178 { "mulhw", OPTION_MASK_MULHW, false, true },
36179 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36180 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36181 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36182 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36183 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36184 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36185 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
36186 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
36187 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
36188 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36189 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36190 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36191 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36192 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36193 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36194 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36195 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36196 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36197 { "string", OPTION_MASK_STRING, false, true },
36198 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
36199 { "update", OPTION_MASK_NO_UPDATE, true , true },
36200 { "vsx", OPTION_MASK_VSX, false, true },
36201 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
36202 #ifdef OPTION_MASK_64BIT
36203 #if TARGET_AIX_OS
36204 { "aix64", OPTION_MASK_64BIT, false, false },
36205 { "aix32", OPTION_MASK_64BIT, true, false },
36206 #else
36207 { "64", OPTION_MASK_64BIT, false, false },
36208 { "32", OPTION_MASK_64BIT, true, false },
36209 #endif
36210 #endif
36211 #ifdef OPTION_MASK_EABI
36212 { "eabi", OPTION_MASK_EABI, false, false },
36213 #endif
36214 #ifdef OPTION_MASK_LITTLE_ENDIAN
36215 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36216 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36217 #endif
36218 #ifdef OPTION_MASK_RELOCATABLE
36219 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36220 #endif
36221 #ifdef OPTION_MASK_STRICT_ALIGN
36222 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36223 #endif
36224 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36225 { "string", OPTION_MASK_STRING, false, false },
36228 /* Builtin mask mapping for printing the flags. */
36229 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
36231 { "altivec", RS6000_BTM_ALTIVEC, false, false },
36232 { "vsx", RS6000_BTM_VSX, false, false },
36233 { "paired", RS6000_BTM_PAIRED, false, false },
36234 { "fre", RS6000_BTM_FRE, false, false },
36235 { "fres", RS6000_BTM_FRES, false, false },
36236 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
36237 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
36238 { "popcntd", RS6000_BTM_POPCNTD, false, false },
36239 { "cell", RS6000_BTM_CELL, false, false },
36240 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
36241 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
36242 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
36243 { "crypto", RS6000_BTM_CRYPTO, false, false },
36244 { "htm", RS6000_BTM_HTM, false, false },
36245 { "hard-dfp", RS6000_BTM_DFP, false, false },
36246 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
36247 { "long-double-128", RS6000_BTM_LDBL128, false, false },
36248 { "float128", RS6000_BTM_FLOAT128, false, false },
36251 /* Option variables that we want to support inside attribute((target)) and
36252 #pragma GCC target operations. */
36254 struct rs6000_opt_var {
36255 const char *name; /* option name */
36256 size_t global_offset; /* offset of the option in global_options. */
36257 size_t target_offset; /* offset of the option in target options. */
36260 static struct rs6000_opt_var const rs6000_opt_vars[] =
36262 { "friz",
36263 offsetof (struct gcc_options, x_TARGET_FRIZ),
36264 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
36265 { "avoid-indexed-addresses",
36266 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
36267 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
36268 { "paired",
36269 offsetof (struct gcc_options, x_rs6000_paired_float),
36270 offsetof (struct cl_target_option, x_rs6000_paired_float), },
36271 { "longcall",
36272 offsetof (struct gcc_options, x_rs6000_default_long_calls),
36273 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
36274 { "optimize-swaps",
36275 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
36276 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
36277 { "allow-movmisalign",
36278 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
36279 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
36280 { "allow-df-permute",
36281 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
36282 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
36283 { "sched-groups",
36284 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
36285 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
36286 { "always-hint",
36287 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
36288 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36289 { "align-branch-targets",
36290 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36291 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36292 { "vectorize-builtins",
36293 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
36294 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
36295 { "tls-markers",
36296 offsetof (struct gcc_options, x_tls_markers),
36297 offsetof (struct cl_target_option, x_tls_markers), },
36298 { "sched-prolog",
36299 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36300 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36301 { "sched-epilog",
36302 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36303 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36306 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36307 parsing. Return true if there were no errors. */
36309 static bool
36310 rs6000_inner_target_options (tree args, bool attr_p)
36312 bool ret = true;
36314 if (args == NULL_TREE)
36317 else if (TREE_CODE (args) == STRING_CST)
36319 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36320 char *q;
36322 while ((q = strtok (p, ",")) != NULL)
36324 bool error_p = false;
36325 bool not_valid_p = false;
36326 const char *cpu_opt = NULL;
36328 p = NULL;
36329 if (strncmp (q, "cpu=", 4) == 0)
36331 int cpu_index = rs6000_cpu_name_lookup (q+4);
36332 if (cpu_index >= 0)
36333 rs6000_cpu_index = cpu_index;
36334 else
36336 error_p = true;
36337 cpu_opt = q+4;
36340 else if (strncmp (q, "tune=", 5) == 0)
36342 int tune_index = rs6000_cpu_name_lookup (q+5);
36343 if (tune_index >= 0)
36344 rs6000_tune_index = tune_index;
36345 else
36347 error_p = true;
36348 cpu_opt = q+5;
36351 else
36353 size_t i;
36354 bool invert = false;
36355 char *r = q;
36357 error_p = true;
36358 if (strncmp (r, "no-", 3) == 0)
36360 invert = true;
36361 r += 3;
36364 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36365 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36367 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36369 if (!rs6000_opt_masks[i].valid_target)
36370 not_valid_p = true;
36371 else
36373 error_p = false;
36374 rs6000_isa_flags_explicit |= mask;
36376 /* VSX needs altivec, so -mvsx automagically sets
36377 altivec and disables -mavoid-indexed-addresses. */
36378 if (!invert)
36380 if (mask == OPTION_MASK_VSX)
36382 mask |= OPTION_MASK_ALTIVEC;
36383 TARGET_AVOID_XFORM = 0;
36387 if (rs6000_opt_masks[i].invert)
36388 invert = !invert;
36390 if (invert)
36391 rs6000_isa_flags &= ~mask;
36392 else
36393 rs6000_isa_flags |= mask;
36395 break;
36398 if (error_p && !not_valid_p)
36400 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36401 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36403 size_t j = rs6000_opt_vars[i].global_offset;
36404 *((int *) ((char *)&global_options + j)) = !invert;
36405 error_p = false;
36406 not_valid_p = false;
36407 break;
36412 if (error_p)
36414 const char *eprefix, *esuffix;
36416 ret = false;
36417 if (attr_p)
36419 eprefix = "__attribute__((__target__(";
36420 esuffix = ")))";
36422 else
36424 eprefix = "#pragma GCC target ";
36425 esuffix = "";
36428 if (cpu_opt)
36429 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
36430 q, esuffix);
36431 else if (not_valid_p)
36432 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
36433 else
36434 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
36439 else if (TREE_CODE (args) == TREE_LIST)
36443 tree value = TREE_VALUE (args);
36444 if (value)
36446 bool ret2 = rs6000_inner_target_options (value, attr_p);
36447 if (!ret2)
36448 ret = false;
36450 args = TREE_CHAIN (args);
36452 while (args != NULL_TREE);
36455 else
36457 error ("attribute %<target%> argument not a string");
36458 return false;
36461 return ret;
36464 /* Print out the target options as a list for -mdebug=target. */
36466 static void
36467 rs6000_debug_target_options (tree args, const char *prefix)
36469 if (args == NULL_TREE)
36470 fprintf (stderr, "%s<NULL>", prefix);
36472 else if (TREE_CODE (args) == STRING_CST)
36474 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36475 char *q;
36477 while ((q = strtok (p, ",")) != NULL)
36479 p = NULL;
36480 fprintf (stderr, "%s\"%s\"", prefix, q);
36481 prefix = ", ";
36485 else if (TREE_CODE (args) == TREE_LIST)
36489 tree value = TREE_VALUE (args);
36490 if (value)
36492 rs6000_debug_target_options (value, prefix);
36493 prefix = ", ";
36495 args = TREE_CHAIN (args);
36497 while (args != NULL_TREE);
36500 else
36501 gcc_unreachable ();
36503 return;
36507 /* Hook to validate attribute((target("..."))). */
36509 static bool
36510 rs6000_valid_attribute_p (tree fndecl,
36511 tree ARG_UNUSED (name),
36512 tree args,
36513 int flags)
36515 struct cl_target_option cur_target;
36516 bool ret;
36517 tree old_optimize = build_optimization_node (&global_options);
36518 tree new_target, new_optimize;
36519 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36521 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36523 if (TARGET_DEBUG_TARGET)
36525 tree tname = DECL_NAME (fndecl);
36526 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36527 if (tname)
36528 fprintf (stderr, "function: %.*s\n",
36529 (int) IDENTIFIER_LENGTH (tname),
36530 IDENTIFIER_POINTER (tname));
36531 else
36532 fprintf (stderr, "function: unknown\n");
36534 fprintf (stderr, "args:");
36535 rs6000_debug_target_options (args, " ");
36536 fprintf (stderr, "\n");
36538 if (flags)
36539 fprintf (stderr, "flags: 0x%x\n", flags);
36541 fprintf (stderr, "--------------------\n");
36544 /* attribute((target("default"))) does nothing, beyond
36545 affecting multi-versioning. */
36546 if (TREE_VALUE (args)
36547 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36548 && TREE_CHAIN (args) == NULL_TREE
36549 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36550 return true;
36552 old_optimize = build_optimization_node (&global_options);
36553 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36555 /* If the function changed the optimization levels as well as setting target
36556 options, start with the optimizations specified. */
36557 if (func_optimize && func_optimize != old_optimize)
36558 cl_optimization_restore (&global_options,
36559 TREE_OPTIMIZATION (func_optimize));
36561 /* The target attributes may also change some optimization flags, so update
36562 the optimization options if necessary. */
36563 cl_target_option_save (&cur_target, &global_options);
36564 rs6000_cpu_index = rs6000_tune_index = -1;
36565 ret = rs6000_inner_target_options (args, true);
36567 /* Set up any additional state. */
36568 if (ret)
36570 ret = rs6000_option_override_internal (false);
36571 new_target = build_target_option_node (&global_options);
36573 else
36574 new_target = NULL;
36576 new_optimize = build_optimization_node (&global_options);
36578 if (!new_target)
36579 ret = false;
36581 else if (fndecl)
36583 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36585 if (old_optimize != new_optimize)
36586 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36589 cl_target_option_restore (&global_options, &cur_target);
36591 if (old_optimize != new_optimize)
36592 cl_optimization_restore (&global_options,
36593 TREE_OPTIMIZATION (old_optimize));
36595 return ret;
36599 /* Hook to validate the current #pragma GCC target and set the state, and
36600 update the macros based on what was changed. If ARGS is NULL, then
36601 POP_TARGET is used to reset the options. */
36603 bool
36604 rs6000_pragma_target_parse (tree args, tree pop_target)
36606 tree prev_tree = build_target_option_node (&global_options);
36607 tree cur_tree;
36608 struct cl_target_option *prev_opt, *cur_opt;
36609 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36610 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36612 if (TARGET_DEBUG_TARGET)
36614 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36615 fprintf (stderr, "args:");
36616 rs6000_debug_target_options (args, " ");
36617 fprintf (stderr, "\n");
36619 if (pop_target)
36621 fprintf (stderr, "pop_target:\n");
36622 debug_tree (pop_target);
36624 else
36625 fprintf (stderr, "pop_target: <NULL>\n");
36627 fprintf (stderr, "--------------------\n");
36630 if (! args)
36632 cur_tree = ((pop_target)
36633 ? pop_target
36634 : target_option_default_node);
36635 cl_target_option_restore (&global_options,
36636 TREE_TARGET_OPTION (cur_tree));
36638 else
36640 rs6000_cpu_index = rs6000_tune_index = -1;
36641 if (!rs6000_inner_target_options (args, false)
36642 || !rs6000_option_override_internal (false)
36643 || (cur_tree = build_target_option_node (&global_options))
36644 == NULL_TREE)
36646 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36647 fprintf (stderr, "invalid pragma\n");
36649 return false;
36653 target_option_current_node = cur_tree;
36655 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36656 change the macros that are defined. */
36657 if (rs6000_target_modify_macros_ptr)
36659 prev_opt = TREE_TARGET_OPTION (prev_tree);
36660 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36661 prev_flags = prev_opt->x_rs6000_isa_flags;
36663 cur_opt = TREE_TARGET_OPTION (cur_tree);
36664 cur_flags = cur_opt->x_rs6000_isa_flags;
36665 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36667 diff_bumask = (prev_bumask ^ cur_bumask);
36668 diff_flags = (prev_flags ^ cur_flags);
36670 if ((diff_flags != 0) || (diff_bumask != 0))
36672 /* Delete old macros. */
36673 rs6000_target_modify_macros_ptr (false,
36674 prev_flags & diff_flags,
36675 prev_bumask & diff_bumask);
36677 /* Define new macros. */
36678 rs6000_target_modify_macros_ptr (true,
36679 cur_flags & diff_flags,
36680 cur_bumask & diff_bumask);
36684 return true;
36688 /* Remember the last target of rs6000_set_current_function. */
36689 static GTY(()) tree rs6000_previous_fndecl;
36691 /* Establish appropriate back-end context for processing the function
36692 FNDECL. The argument might be NULL to indicate processing at top
36693 level, outside of any function scope. */
36694 static void
36695 rs6000_set_current_function (tree fndecl)
36697 tree old_tree = (rs6000_previous_fndecl
36698 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
36699 : NULL_TREE);
36701 tree new_tree = (fndecl
36702 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
36703 : NULL_TREE);
36705 if (TARGET_DEBUG_TARGET)
36707 bool print_final = false;
36708 fprintf (stderr, "\n==================== rs6000_set_current_function");
36710 if (fndecl)
36711 fprintf (stderr, ", fndecl %s (%p)",
36712 (DECL_NAME (fndecl)
36713 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36714 : "<unknown>"), (void *)fndecl);
36716 if (rs6000_previous_fndecl)
36717 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36719 fprintf (stderr, "\n");
36720 if (new_tree)
36722 fprintf (stderr, "\nnew fndecl target specific options:\n");
36723 debug_tree (new_tree);
36724 print_final = true;
36727 if (old_tree)
36729 fprintf (stderr, "\nold fndecl target specific options:\n");
36730 debug_tree (old_tree);
36731 print_final = true;
36734 if (print_final)
36735 fprintf (stderr, "--------------------\n");
36738 /* Only change the context if the function changes. This hook is called
36739 several times in the course of compiling a function, and we don't want to
36740 slow things down too much or call target_reinit when it isn't safe. */
36741 if (fndecl && fndecl != rs6000_previous_fndecl)
36743 rs6000_previous_fndecl = fndecl;
36744 if (old_tree == new_tree)
36747 else if (new_tree && new_tree != target_option_default_node)
36749 cl_target_option_restore (&global_options,
36750 TREE_TARGET_OPTION (new_tree));
36751 if (TREE_TARGET_GLOBALS (new_tree))
36752 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36753 else
36754 TREE_TARGET_GLOBALS (new_tree)
36755 = save_target_globals_default_opts ();
36758 else if (old_tree && old_tree != target_option_default_node)
36760 new_tree = target_option_current_node;
36761 cl_target_option_restore (&global_options,
36762 TREE_TARGET_OPTION (new_tree));
36763 if (TREE_TARGET_GLOBALS (new_tree))
36764 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36765 else if (new_tree == target_option_default_node)
36766 restore_target_globals (&default_target_globals);
36767 else
36768 TREE_TARGET_GLOBALS (new_tree)
36769 = save_target_globals_default_opts ();
36775 /* Save the current options */
36777 static void
36778 rs6000_function_specific_save (struct cl_target_option *ptr,
36779 struct gcc_options *opts)
36781 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36782 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36785 /* Restore the current options */
36787 static void
36788 rs6000_function_specific_restore (struct gcc_options *opts,
36789 struct cl_target_option *ptr)
36792 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36793 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36794 (void) rs6000_option_override_internal (false);
36797 /* Print the current options */
36799 static void
36800 rs6000_function_specific_print (FILE *file, int indent,
36801 struct cl_target_option *ptr)
36803 rs6000_print_isa_options (file, indent, "Isa options set",
36804 ptr->x_rs6000_isa_flags);
36806 rs6000_print_isa_options (file, indent, "Isa options explicit",
36807 ptr->x_rs6000_isa_flags_explicit);
36810 /* Helper function to print the current isa or misc options on a line. */
36812 static void
36813 rs6000_print_options_internal (FILE *file,
36814 int indent,
36815 const char *string,
36816 HOST_WIDE_INT flags,
36817 const char *prefix,
36818 const struct rs6000_opt_mask *opts,
36819 size_t num_elements)
36821 size_t i;
36822 size_t start_column = 0;
36823 size_t cur_column;
36824 size_t max_column = 120;
36825 size_t prefix_len = strlen (prefix);
36826 size_t comma_len = 0;
36827 const char *comma = "";
36829 if (indent)
36830 start_column += fprintf (file, "%*s", indent, "");
36832 if (!flags)
36834 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36835 return;
36838 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36840 /* Print the various mask options. */
36841 cur_column = start_column;
36842 for (i = 0; i < num_elements; i++)
36844 bool invert = opts[i].invert;
36845 const char *name = opts[i].name;
36846 const char *no_str = "";
36847 HOST_WIDE_INT mask = opts[i].mask;
36848 size_t len = comma_len + prefix_len + strlen (name);
36850 if (!invert)
36852 if ((flags & mask) == 0)
36854 no_str = "no-";
36855 len += sizeof ("no-") - 1;
36858 flags &= ~mask;
36861 else
36863 if ((flags & mask) != 0)
36865 no_str = "no-";
36866 len += sizeof ("no-") - 1;
36869 flags |= mask;
36872 cur_column += len;
36873 if (cur_column > max_column)
36875 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
36876 cur_column = start_column + len;
36877 comma = "";
36880 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
36881 comma = ", ";
36882 comma_len = sizeof (", ") - 1;
36885 fputs ("\n", file);
36888 /* Helper function to print the current isa options on a line. */
36890 static void
36891 rs6000_print_isa_options (FILE *file, int indent, const char *string,
36892 HOST_WIDE_INT flags)
36894 rs6000_print_options_internal (file, indent, string, flags, "-m",
36895 &rs6000_opt_masks[0],
36896 ARRAY_SIZE (rs6000_opt_masks));
36899 static void
36900 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
36901 HOST_WIDE_INT flags)
36903 rs6000_print_options_internal (file, indent, string, flags, "",
36904 &rs6000_builtin_mask_names[0],
36905 ARRAY_SIZE (rs6000_builtin_mask_names));
36908 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
36909 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
36910 -mvsx-timode, -mupper-regs-df).
36912 If the user used -mno-power8-vector, we need to turn off all of the implicit
36913 ISA 2.07 and 3.0 options that relate to the vector unit.
36915 If the user used -mno-power9-vector, we need to turn off all of the implicit
36916 ISA 3.0 options that relate to the vector unit.
36918 This function does not handle explicit options such as the user specifying
36919 -mdirect-move. These are handled in rs6000_option_override_internal, and
36920 the appropriate error is given if needed.
36922 We return a mask of all of the implicit options that should not be enabled
36923 by default. */
36925 static HOST_WIDE_INT
36926 rs6000_disable_incompatible_switches (void)
36928 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
36929 size_t i, j;
36931 static const struct {
36932 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
36933 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
36934 const char *const name; /* name of the switch. */
36935 } flags[] = {
36936 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
36937 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
36938 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
36941 for (i = 0; i < ARRAY_SIZE (flags); i++)
36943 HOST_WIDE_INT no_flag = flags[i].no_flag;
36945 if ((rs6000_isa_flags & no_flag) == 0
36946 && (rs6000_isa_flags_explicit & no_flag) != 0)
36948 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
36949 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
36950 & rs6000_isa_flags
36951 & dep_flags);
36953 if (set_flags)
36955 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
36956 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
36958 set_flags &= ~rs6000_opt_masks[j].mask;
36959 error ("-mno-%s turns off -m%s",
36960 flags[i].name,
36961 rs6000_opt_masks[j].name);
36964 gcc_assert (!set_flags);
36967 rs6000_isa_flags &= ~dep_flags;
36968 ignore_masks |= no_flag | dep_flags;
36972 if (!TARGET_P9_VECTOR
36973 && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0
36974 && TARGET_P9_DFORM_BOTH > 0)
36976 error ("-mno-power9-vector turns off -mpower9-dform");
36977 TARGET_P9_DFORM_BOTH = 0;
36980 return ignore_masks;
36984 /* Helper function for printing the function name when debugging. */
36986 static const char *
36987 get_decl_name (tree fn)
36989 tree name;
36991 if (!fn)
36992 return "<null>";
36994 name = DECL_NAME (fn);
36995 if (!name)
36996 return "<no-name>";
36998 return IDENTIFIER_POINTER (name);
37001 /* Return the clone id of the target we are compiling code for in a target
37002 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
37003 the priority list for the target clones (ordered from lowest to
37004 highest). */
37006 static int
37007 rs6000_clone_priority (tree fndecl)
37009 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37010 HOST_WIDE_INT isa_masks;
37011 int ret = CLONE_DEFAULT;
37012 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
37013 const char *attrs_str = NULL;
37015 attrs = TREE_VALUE (TREE_VALUE (attrs));
37016 attrs_str = TREE_STRING_POINTER (attrs);
37018 /* Return priority zero for default function. Return the ISA needed for the
37019 function if it is not the default. */
37020 if (strcmp (attrs_str, "default") != 0)
37022 if (fn_opts == NULL_TREE)
37023 fn_opts = target_option_default_node;
37025 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
37026 isa_masks = rs6000_isa_flags;
37027 else
37028 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
37030 for (ret = CLONE_MAX - 1; ret != 0; ret--)
37031 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
37032 break;
37035 if (TARGET_DEBUG_TARGET)
37036 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
37037 get_decl_name (fndecl), ret);
37039 return ret;
37042 /* This compares the priority of target features in function DECL1 and DECL2.
37043 It returns positive value if DECL1 is higher priority, negative value if
37044 DECL2 is higher priority and 0 if they are the same. Note, priorities are
37045 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
37047 static int
37048 rs6000_compare_version_priority (tree decl1, tree decl2)
37050 int priority1 = rs6000_clone_priority (decl1);
37051 int priority2 = rs6000_clone_priority (decl2);
37052 int ret = priority1 - priority2;
37054 if (TARGET_DEBUG_TARGET)
37055 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
37056 get_decl_name (decl1), get_decl_name (decl2), ret);
37058 return ret;
37061 /* Make a dispatcher declaration for the multi-versioned function DECL.
37062 Calls to DECL function will be replaced with calls to the dispatcher
37063 by the front-end. Returns the decl of the dispatcher function. */
37065 static tree
37066 rs6000_get_function_versions_dispatcher (void *decl)
37068 tree fn = (tree) decl;
37069 struct cgraph_node *node = NULL;
37070 struct cgraph_node *default_node = NULL;
37071 struct cgraph_function_version_info *node_v = NULL;
37072 struct cgraph_function_version_info *first_v = NULL;
37074 tree dispatch_decl = NULL;
37076 struct cgraph_function_version_info *default_version_info = NULL;
37077 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
37079 if (TARGET_DEBUG_TARGET)
37080 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
37081 get_decl_name (fn));
37083 node = cgraph_node::get (fn);
37084 gcc_assert (node != NULL);
37086 node_v = node->function_version ();
37087 gcc_assert (node_v != NULL);
37089 if (node_v->dispatcher_resolver != NULL)
37090 return node_v->dispatcher_resolver;
37092 /* Find the default version and make it the first node. */
37093 first_v = node_v;
37094 /* Go to the beginning of the chain. */
37095 while (first_v->prev != NULL)
37096 first_v = first_v->prev;
37098 default_version_info = first_v;
37099 while (default_version_info != NULL)
37101 const tree decl2 = default_version_info->this_node->decl;
37102 if (is_function_default_version (decl2))
37103 break;
37104 default_version_info = default_version_info->next;
37107 /* If there is no default node, just return NULL. */
37108 if (default_version_info == NULL)
37109 return NULL;
37111 /* Make default info the first node. */
37112 if (first_v != default_version_info)
37114 default_version_info->prev->next = default_version_info->next;
37115 if (default_version_info->next)
37116 default_version_info->next->prev = default_version_info->prev;
37117 first_v->prev = default_version_info;
37118 default_version_info->next = first_v;
37119 default_version_info->prev = NULL;
37122 default_node = default_version_info->this_node;
37124 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
37125 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37126 "target_clones attribute needs GLIBC (2.23 and newer) that "
37127 "exports hardware capability bits");
37128 #else
37130 if (targetm.has_ifunc_p ())
37132 struct cgraph_function_version_info *it_v = NULL;
37133 struct cgraph_node *dispatcher_node = NULL;
37134 struct cgraph_function_version_info *dispatcher_version_info = NULL;
37136 /* Right now, the dispatching is done via ifunc. */
37137 dispatch_decl = make_dispatcher_decl (default_node->decl);
37139 dispatcher_node = cgraph_node::get_create (dispatch_decl);
37140 gcc_assert (dispatcher_node != NULL);
37141 dispatcher_node->dispatcher_function = 1;
37142 dispatcher_version_info
37143 = dispatcher_node->insert_new_function_version ();
37144 dispatcher_version_info->next = default_version_info;
37145 dispatcher_node->definition = 1;
37147 /* Set the dispatcher for all the versions. */
37148 it_v = default_version_info;
37149 while (it_v != NULL)
37151 it_v->dispatcher_resolver = dispatch_decl;
37152 it_v = it_v->next;
37155 else
37157 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37158 "multiversioning needs ifunc which is not supported "
37159 "on this target");
37161 #endif
37163 return dispatch_decl;
37166 /* Make the resolver function decl to dispatch the versions of a multi-
37167 versioned function, DEFAULT_DECL. Create an empty basic block in the
37168 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
37169 function. */
37171 static tree
37172 make_resolver_func (const tree default_decl,
37173 const tree dispatch_decl,
37174 basic_block *empty_bb)
37176 /* Make the resolver function static. The resolver function returns
37177 void *. */
37178 tree decl_name = clone_function_name (default_decl, "resolver");
37179 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
37180 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
37181 tree decl = build_fn_decl (resolver_name, type);
37182 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37184 DECL_NAME (decl) = decl_name;
37185 TREE_USED (decl) = 1;
37186 DECL_ARTIFICIAL (decl) = 1;
37187 DECL_IGNORED_P (decl) = 0;
37188 TREE_PUBLIC (decl) = 0;
37189 DECL_UNINLINABLE (decl) = 1;
37191 /* Resolver is not external, body is generated. */
37192 DECL_EXTERNAL (decl) = 0;
37193 DECL_EXTERNAL (dispatch_decl) = 0;
37195 DECL_CONTEXT (decl) = NULL_TREE;
37196 DECL_INITIAL (decl) = make_node (BLOCK);
37197 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37199 /* Build result decl and add to function_decl. */
37200 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37201 DECL_ARTIFICIAL (t) = 1;
37202 DECL_IGNORED_P (t) = 1;
37203 DECL_RESULT (decl) = t;
37205 gimplify_function_tree (decl);
37206 push_cfun (DECL_STRUCT_FUNCTION (decl));
37207 *empty_bb = init_lowered_empty_function (decl, false,
37208 profile_count::uninitialized ());
37210 cgraph_node::add_new_function (decl, true);
37211 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37213 pop_cfun ();
37215 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37216 DECL_ATTRIBUTES (dispatch_decl)
37217 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37219 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37221 return decl;
37224 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
37225 return a pointer to VERSION_DECL if we are running on a machine that
37226 supports the index CLONE_ISA hardware architecture bits. This function will
37227 be called during version dispatch to decide which function version to
37228 execute. It returns the basic block at the end, to which more conditions
37229 can be added. */
37231 static basic_block
37232 add_condition_to_bb (tree function_decl, tree version_decl,
37233 int clone_isa, basic_block new_bb)
37235 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
37237 gcc_assert (new_bb != NULL);
37238 gimple_seq gseq = bb_seq (new_bb);
37241 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
37242 build_fold_addr_expr (version_decl));
37243 tree result_var = create_tmp_var (ptr_type_node);
37244 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
37245 gimple *return_stmt = gimple_build_return (result_var);
37247 if (clone_isa == CLONE_DEFAULT)
37249 gimple_seq_add_stmt (&gseq, convert_stmt);
37250 gimple_seq_add_stmt (&gseq, return_stmt);
37251 set_bb_seq (new_bb, gseq);
37252 gimple_set_bb (convert_stmt, new_bb);
37253 gimple_set_bb (return_stmt, new_bb);
37254 pop_cfun ();
37255 return new_bb;
37258 tree bool_zero = build_int_cst (bool_int_type_node, 0);
37259 tree cond_var = create_tmp_var (bool_int_type_node);
37260 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
37261 const char *arg_str = rs6000_clone_map[clone_isa].name;
37262 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
37263 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
37264 gimple_call_set_lhs (call_cond_stmt, cond_var);
37266 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
37267 gimple_set_bb (call_cond_stmt, new_bb);
37268 gimple_seq_add_stmt (&gseq, call_cond_stmt);
37270 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
37271 NULL_TREE, NULL_TREE);
37272 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
37273 gimple_set_bb (if_else_stmt, new_bb);
37274 gimple_seq_add_stmt (&gseq, if_else_stmt);
37276 gimple_seq_add_stmt (&gseq, convert_stmt);
37277 gimple_seq_add_stmt (&gseq, return_stmt);
37278 set_bb_seq (new_bb, gseq);
37280 basic_block bb1 = new_bb;
37281 edge e12 = split_block (bb1, if_else_stmt);
37282 basic_block bb2 = e12->dest;
37283 e12->flags &= ~EDGE_FALLTHRU;
37284 e12->flags |= EDGE_TRUE_VALUE;
37286 edge e23 = split_block (bb2, return_stmt);
37287 gimple_set_bb (convert_stmt, bb2);
37288 gimple_set_bb (return_stmt, bb2);
37290 basic_block bb3 = e23->dest;
37291 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
37293 remove_edge (e23);
37294 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
37296 pop_cfun ();
37297 return bb3;
37300 /* This function generates the dispatch function for multi-versioned functions.
37301 DISPATCH_DECL is the function which will contain the dispatch logic.
37302 FNDECLS are the function choices for dispatch, and is a tree chain.
37303 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
37304 code is generated. */
37306 static int
37307 dispatch_function_versions (tree dispatch_decl,
37308 void *fndecls_p,
37309 basic_block *empty_bb)
37311 int ix;
37312 tree ele;
37313 vec<tree> *fndecls;
37314 tree clones[CLONE_MAX];
37316 if (TARGET_DEBUG_TARGET)
37317 fputs ("dispatch_function_versions, top\n", stderr);
37319 gcc_assert (dispatch_decl != NULL
37320 && fndecls_p != NULL
37321 && empty_bb != NULL);
37323 /* fndecls_p is actually a vector. */
37324 fndecls = static_cast<vec<tree> *> (fndecls_p);
37326 /* At least one more version other than the default. */
37327 gcc_assert (fndecls->length () >= 2);
37329 /* The first version in the vector is the default decl. */
37330 memset ((void *) clones, '\0', sizeof (clones));
37331 clones[CLONE_DEFAULT] = (*fndecls)[0];
37333 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
37334 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
37335 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
37336 recent glibc. If we ever need to call __builtin_cpu_init, we would need
37337 to insert the code here to do the call. */
37339 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37341 int priority = rs6000_clone_priority (ele);
37342 if (!clones[priority])
37343 clones[priority] = ele;
37346 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
37347 if (clones[ix])
37349 if (TARGET_DEBUG_TARGET)
37350 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
37351 ix, get_decl_name (clones[ix]));
37353 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
37354 *empty_bb);
37357 return 0;
37360 /* Generate the dispatching code body to dispatch multi-versioned function
37361 DECL. The target hook is called to process the "target" attributes and
37362 provide the code to dispatch the right function at run-time. NODE points
37363 to the dispatcher decl whose body will be created. */
37365 static tree
37366 rs6000_generate_version_dispatcher_body (void *node_p)
37368 tree resolver;
37369 basic_block empty_bb;
37370 struct cgraph_node *node = (cgraph_node *) node_p;
37371 struct cgraph_function_version_info *ninfo = node->function_version ();
37373 if (ninfo->dispatcher_resolver)
37374 return ninfo->dispatcher_resolver;
37376 /* node is going to be an alias, so remove the finalized bit. */
37377 node->definition = false;
37379 /* The first version in the chain corresponds to the default version. */
37380 ninfo->dispatcher_resolver = resolver
37381 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37383 if (TARGET_DEBUG_TARGET)
37384 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37385 get_decl_name (resolver));
37387 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37388 auto_vec<tree, 2> fn_ver_vec;
37390 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37391 vinfo;
37392 vinfo = vinfo->next)
37394 struct cgraph_node *version = vinfo->this_node;
37395 /* Check for virtual functions here again, as by this time it should
37396 have been determined if this function needs a vtable index or
37397 not. This happens for methods in derived classes that override
37398 virtual methods in base classes but are not explicitly marked as
37399 virtual. */
37400 if (DECL_VINDEX (version->decl))
37401 sorry ("Virtual function multiversioning not supported");
37403 fn_ver_vec.safe_push (version->decl);
37406 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37407 cgraph_edge::rebuild_edges ();
37408 pop_cfun ();
37409 return resolver;
37413 /* Hook to determine if one function can safely inline another. */
37415 static bool
37416 rs6000_can_inline_p (tree caller, tree callee)
37418 bool ret = false;
37419 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37420 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37422 /* If callee has no option attributes, then it is ok to inline. */
37423 if (!callee_tree)
37424 ret = true;
37426 /* If caller has no option attributes, but callee does then it is not ok to
37427 inline. */
37428 else if (!caller_tree)
37429 ret = false;
37431 else
37433 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37434 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37436 /* Callee's options should a subset of the caller's, i.e. a vsx function
37437 can inline an altivec function but a non-vsx function can't inline a
37438 vsx function. */
37439 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37440 == callee_opts->x_rs6000_isa_flags)
37441 ret = true;
37444 if (TARGET_DEBUG_TARGET)
37445 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37446 get_decl_name (caller), get_decl_name (callee),
37447 (ret ? "can" : "cannot"));
37449 return ret;
37452 /* Allocate a stack temp and fixup the address so it meets the particular
37453 memory requirements (either offetable or REG+REG addressing). */
37456 rs6000_allocate_stack_temp (machine_mode mode,
37457 bool offsettable_p,
37458 bool reg_reg_p)
37460 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37461 rtx addr = XEXP (stack, 0);
37462 int strict_p = (reload_in_progress || reload_completed);
37464 if (!legitimate_indirect_address_p (addr, strict_p))
37466 if (offsettable_p
37467 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37468 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37470 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37471 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37474 return stack;
37477 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37478 to such a form to deal with memory reference instructions like STFIWX that
37479 only take reg+reg addressing. */
37482 rs6000_address_for_fpconvert (rtx x)
37484 int strict_p = (reload_in_progress || reload_completed);
37485 rtx addr;
37487 gcc_assert (MEM_P (x));
37488 addr = XEXP (x, 0);
37489 if (! legitimate_indirect_address_p (addr, strict_p)
37490 && ! legitimate_indexed_address_p (addr, strict_p))
37492 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37494 rtx reg = XEXP (addr, 0);
37495 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37496 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37497 gcc_assert (REG_P (reg));
37498 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37499 addr = reg;
37501 else if (GET_CODE (addr) == PRE_MODIFY)
37503 rtx reg = XEXP (addr, 0);
37504 rtx expr = XEXP (addr, 1);
37505 gcc_assert (REG_P (reg));
37506 gcc_assert (GET_CODE (expr) == PLUS);
37507 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37508 addr = reg;
37511 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37514 return x;
37517 /* Given a memory reference, if it is not in the form for altivec memory
37518 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
37519 convert to the altivec format. */
37522 rs6000_address_for_altivec (rtx x)
37524 gcc_assert (MEM_P (x));
37525 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
37527 rtx addr = XEXP (x, 0);
37528 int strict_p = (reload_in_progress || reload_completed);
37530 if (!legitimate_indexed_address_p (addr, strict_p)
37531 && !legitimate_indirect_address_p (addr, strict_p))
37532 addr = copy_to_mode_reg (Pmode, addr);
37534 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
37535 x = change_address (x, GET_MODE (x), addr);
37538 return x;
37541 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37543 On the RS/6000, all integer constants are acceptable, most won't be valid
37544 for particular insns, though. Only easy FP constants are acceptable. */
37546 static bool
37547 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37549 if (TARGET_ELF && tls_referenced_p (x))
37550 return false;
37552 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37553 || GET_MODE (x) == VOIDmode
37554 || (TARGET_POWERPC64 && mode == DImode)
37555 || easy_fp_constant (x, mode)
37556 || easy_vector_constant (x, mode));
37560 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37562 static bool
37563 chain_already_loaded (rtx_insn *last)
37565 for (; last != NULL; last = PREV_INSN (last))
37567 if (NONJUMP_INSN_P (last))
37569 rtx patt = PATTERN (last);
37571 if (GET_CODE (patt) == SET)
37573 rtx lhs = XEXP (patt, 0);
37575 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37576 return true;
37580 return false;
37583 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37585 void
37586 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37588 const bool direct_call_p
37589 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37590 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37591 rtx toc_load = NULL_RTX;
37592 rtx toc_restore = NULL_RTX;
37593 rtx func_addr;
37594 rtx abi_reg = NULL_RTX;
37595 rtx call[4];
37596 int n_call;
37597 rtx insn;
37599 /* Handle longcall attributes. */
37600 if (INTVAL (cookie) & CALL_LONG)
37601 func_desc = rs6000_longcall_ref (func_desc);
37603 /* Handle indirect calls. */
37604 if (GET_CODE (func_desc) != SYMBOL_REF
37605 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
37607 /* Save the TOC into its reserved slot before the call,
37608 and prepare to restore it after the call. */
37609 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37610 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37611 rtx stack_toc_mem = gen_frame_mem (Pmode,
37612 gen_rtx_PLUS (Pmode, stack_ptr,
37613 stack_toc_offset));
37614 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37615 gen_rtvec (1, stack_toc_offset),
37616 UNSPEC_TOCSLOT);
37617 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37619 /* Can we optimize saving the TOC in the prologue or
37620 do we need to do it at every call? */
37621 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37622 cfun->machine->save_toc_in_prologue = true;
37623 else
37625 MEM_VOLATILE_P (stack_toc_mem) = 1;
37626 emit_move_insn (stack_toc_mem, toc_reg);
37629 if (DEFAULT_ABI == ABI_ELFv2)
37631 /* A function pointer in the ELFv2 ABI is just a plain address, but
37632 the ABI requires it to be loaded into r12 before the call. */
37633 func_addr = gen_rtx_REG (Pmode, 12);
37634 emit_move_insn (func_addr, func_desc);
37635 abi_reg = func_addr;
37637 else
37639 /* A function pointer under AIX is a pointer to a data area whose
37640 first word contains the actual address of the function, whose
37641 second word contains a pointer to its TOC, and whose third word
37642 contains a value to place in the static chain register (r11).
37643 Note that if we load the static chain, our "trampoline" need
37644 not have any executable code. */
37646 /* Load up address of the actual function. */
37647 func_desc = force_reg (Pmode, func_desc);
37648 func_addr = gen_reg_rtx (Pmode);
37649 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
37651 /* Prepare to load the TOC of the called function. Note that the
37652 TOC load must happen immediately before the actual call so
37653 that unwinding the TOC registers works correctly. See the
37654 comment in frob_update_context. */
37655 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37656 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37657 gen_rtx_PLUS (Pmode, func_desc,
37658 func_toc_offset));
37659 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37661 /* If we have a static chain, load it up. But, if the call was
37662 originally direct, the 3rd word has not been written since no
37663 trampoline has been built, so we ought not to load it, lest we
37664 override a static chain value. */
37665 if (!direct_call_p
37666 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37667 && !chain_already_loaded (get_current_sequence ()->next->last))
37669 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37670 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37671 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37672 gen_rtx_PLUS (Pmode, func_desc,
37673 func_sc_offset));
37674 emit_move_insn (sc_reg, func_sc_mem);
37675 abi_reg = sc_reg;
37679 else
37681 /* Direct calls use the TOC: for local calls, the callee will
37682 assume the TOC register is set; for non-local calls, the
37683 PLT stub needs the TOC register. */
37684 abi_reg = toc_reg;
37685 func_addr = func_desc;
37688 /* Create the call. */
37689 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
37690 if (value != NULL_RTX)
37691 call[0] = gen_rtx_SET (value, call[0]);
37692 n_call = 1;
37694 if (toc_load)
37695 call[n_call++] = toc_load;
37696 if (toc_restore)
37697 call[n_call++] = toc_restore;
37699 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
37701 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37702 insn = emit_call_insn (insn);
37704 /* Mention all registers defined by the ABI to hold information
37705 as uses in CALL_INSN_FUNCTION_USAGE. */
37706 if (abi_reg)
37707 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37710 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37712 void
37713 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37715 rtx call[2];
37716 rtx insn;
37718 gcc_assert (INTVAL (cookie) == 0);
37720 /* Create the call. */
37721 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
37722 if (value != NULL_RTX)
37723 call[0] = gen_rtx_SET (value, call[0]);
37725 call[1] = simple_return_rtx;
37727 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37728 insn = emit_call_insn (insn);
37730 /* Note use of the TOC register. */
37731 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37734 /* Return whether we need to always update the saved TOC pointer when we update
37735 the stack pointer. */
37737 static bool
37738 rs6000_save_toc_in_prologue_p (void)
37740 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
37743 #ifdef HAVE_GAS_HIDDEN
37744 # define USE_HIDDEN_LINKONCE 1
37745 #else
37746 # define USE_HIDDEN_LINKONCE 0
37747 #endif
37749 /* Fills in the label name that should be used for a 476 link stack thunk. */
37751 void
37752 get_ppc476_thunk_name (char name[32])
37754 gcc_assert (TARGET_LINK_STACK);
37756 if (USE_HIDDEN_LINKONCE)
37757 sprintf (name, "__ppc476.get_thunk");
37758 else
37759 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
37762 /* This function emits the simple thunk routine that is used to preserve
37763 the link stack on the 476 cpu. */
37765 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
37766 static void
37767 rs6000_code_end (void)
37769 char name[32];
37770 tree decl;
37772 if (!TARGET_LINK_STACK)
37773 return;
37775 get_ppc476_thunk_name (name);
37777 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
37778 build_function_type_list (void_type_node, NULL_TREE));
37779 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
37780 NULL_TREE, void_type_node);
37781 TREE_PUBLIC (decl) = 1;
37782 TREE_STATIC (decl) = 1;
37784 #if RS6000_WEAK
37785 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
37787 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
37788 targetm.asm_out.unique_section (decl, 0);
37789 switch_to_section (get_named_section (decl, NULL, 0));
37790 DECL_WEAK (decl) = 1;
37791 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
37792 targetm.asm_out.globalize_label (asm_out_file, name);
37793 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
37794 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
37796 else
37797 #endif
37799 switch_to_section (text_section);
37800 ASM_OUTPUT_LABEL (asm_out_file, name);
37803 DECL_INITIAL (decl) = make_node (BLOCK);
37804 current_function_decl = decl;
37805 allocate_struct_function (decl, false);
37806 init_function_start (decl);
37807 first_function_block_is_cold = false;
37808 /* Make sure unwind info is emitted for the thunk if needed. */
37809 final_start_function (emit_barrier (), asm_out_file, 1);
37811 fputs ("\tblr\n", asm_out_file);
37813 final_end_function ();
37814 init_insn_lengths ();
37815 free_after_compilation (cfun);
37816 set_cfun (NULL);
37817 current_function_decl = NULL;
37820 /* Add r30 to hard reg set if the prologue sets it up and it is not
37821 pic_offset_table_rtx. */
37823 static void
37824 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
37826 if (!TARGET_SINGLE_PIC_BASE
37827 && TARGET_TOC
37828 && TARGET_MINIMAL_TOC
37829 && !constant_pool_empty_p ())
37830 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
37831 if (cfun->machine->split_stack_argp_used)
37832 add_to_hard_reg_set (&set->set, Pmode, 12);
37836 /* Helper function for rs6000_split_logical to emit a logical instruction after
37837 spliting the operation to single GPR registers.
37839 DEST is the destination register.
37840 OP1 and OP2 are the input source registers.
37841 CODE is the base operation (AND, IOR, XOR, NOT).
37842 MODE is the machine mode.
37843 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37844 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37845 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37847 static void
37848 rs6000_split_logical_inner (rtx dest,
37849 rtx op1,
37850 rtx op2,
37851 enum rtx_code code,
37852 machine_mode mode,
37853 bool complement_final_p,
37854 bool complement_op1_p,
37855 bool complement_op2_p)
37857 rtx bool_rtx;
37859 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
37860 if (op2 && GET_CODE (op2) == CONST_INT
37861 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
37862 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37864 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
37865 HOST_WIDE_INT value = INTVAL (op2) & mask;
37867 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
37868 if (code == AND)
37870 if (value == 0)
37872 emit_insn (gen_rtx_SET (dest, const0_rtx));
37873 return;
37876 else if (value == mask)
37878 if (!rtx_equal_p (dest, op1))
37879 emit_insn (gen_rtx_SET (dest, op1));
37880 return;
37884 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
37885 into separate ORI/ORIS or XORI/XORIS instrucitons. */
37886 else if (code == IOR || code == XOR)
37888 if (value == 0)
37890 if (!rtx_equal_p (dest, op1))
37891 emit_insn (gen_rtx_SET (dest, op1));
37892 return;
37897 if (code == AND && mode == SImode
37898 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37900 emit_insn (gen_andsi3 (dest, op1, op2));
37901 return;
37904 if (complement_op1_p)
37905 op1 = gen_rtx_NOT (mode, op1);
37907 if (complement_op2_p)
37908 op2 = gen_rtx_NOT (mode, op2);
37910 /* For canonical RTL, if only one arm is inverted it is the first. */
37911 if (!complement_op1_p && complement_op2_p)
37912 std::swap (op1, op2);
37914 bool_rtx = ((code == NOT)
37915 ? gen_rtx_NOT (mode, op1)
37916 : gen_rtx_fmt_ee (code, mode, op1, op2));
37918 if (complement_final_p)
37919 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
37921 emit_insn (gen_rtx_SET (dest, bool_rtx));
37924 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
37925 operations are split immediately during RTL generation to allow for more
37926 optimizations of the AND/IOR/XOR.
37928 OPERANDS is an array containing the destination and two input operands.
37929 CODE is the base operation (AND, IOR, XOR, NOT).
37930 MODE is the machine mode.
37931 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37932 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37933 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
37934 CLOBBER_REG is either NULL or a scratch register of type CC to allow
37935 formation of the AND instructions. */
37937 static void
37938 rs6000_split_logical_di (rtx operands[3],
37939 enum rtx_code code,
37940 bool complement_final_p,
37941 bool complement_op1_p,
37942 bool complement_op2_p)
37944 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
37945 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
37946 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
37947 enum hi_lo { hi = 0, lo = 1 };
37948 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
37949 size_t i;
37951 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
37952 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
37953 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
37954 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
37956 if (code == NOT)
37957 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
37958 else
37960 if (GET_CODE (operands[2]) != CONST_INT)
37962 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
37963 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
37965 else
37967 HOST_WIDE_INT value = INTVAL (operands[2]);
37968 HOST_WIDE_INT value_hi_lo[2];
37970 gcc_assert (!complement_final_p);
37971 gcc_assert (!complement_op1_p);
37972 gcc_assert (!complement_op2_p);
37974 value_hi_lo[hi] = value >> 32;
37975 value_hi_lo[lo] = value & lower_32bits;
37977 for (i = 0; i < 2; i++)
37979 HOST_WIDE_INT sub_value = value_hi_lo[i];
37981 if (sub_value & sign_bit)
37982 sub_value |= upper_32bits;
37984 op2_hi_lo[i] = GEN_INT (sub_value);
37986 /* If this is an AND instruction, check to see if we need to load
37987 the value in a register. */
37988 if (code == AND && sub_value != -1 && sub_value != 0
37989 && !and_operand (op2_hi_lo[i], SImode))
37990 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
37995 for (i = 0; i < 2; i++)
37997 /* Split large IOR/XOR operations. */
37998 if ((code == IOR || code == XOR)
37999 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38000 && !complement_final_p
38001 && !complement_op1_p
38002 && !complement_op2_p
38003 && !logical_const_operand (op2_hi_lo[i], SImode))
38005 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38006 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38007 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38008 rtx tmp = gen_reg_rtx (SImode);
38010 /* Make sure the constant is sign extended. */
38011 if ((hi_16bits & sign_bit) != 0)
38012 hi_16bits |= upper_32bits;
38014 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38015 code, SImode, false, false, false);
38017 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38018 code, SImode, false, false, false);
38020 else
38021 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38022 code, SImode, complement_final_p,
38023 complement_op1_p, complement_op2_p);
38026 return;
38029 /* Split the insns that make up boolean operations operating on multiple GPR
38030 registers. The boolean MD patterns ensure that the inputs either are
38031 exactly the same as the output registers, or there is no overlap.
38033 OPERANDS is an array containing the destination and two input operands.
38034 CODE is the base operation (AND, IOR, XOR, NOT).
38035 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38036 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38037 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38039 void
38040 rs6000_split_logical (rtx operands[3],
38041 enum rtx_code code,
38042 bool complement_final_p,
38043 bool complement_op1_p,
38044 bool complement_op2_p)
38046 machine_mode mode = GET_MODE (operands[0]);
38047 machine_mode sub_mode;
38048 rtx op0, op1, op2;
38049 int sub_size, regno0, regno1, nregs, i;
38051 /* If this is DImode, use the specialized version that can run before
38052 register allocation. */
38053 if (mode == DImode && !TARGET_POWERPC64)
38055 rs6000_split_logical_di (operands, code, complement_final_p,
38056 complement_op1_p, complement_op2_p);
38057 return;
38060 op0 = operands[0];
38061 op1 = operands[1];
38062 op2 = (code == NOT) ? NULL_RTX : operands[2];
38063 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38064 sub_size = GET_MODE_SIZE (sub_mode);
38065 regno0 = REGNO (op0);
38066 regno1 = REGNO (op1);
38068 gcc_assert (reload_completed);
38069 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38070 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38072 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38073 gcc_assert (nregs > 1);
38075 if (op2 && REG_P (op2))
38076 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38078 for (i = 0; i < nregs; i++)
38080 int offset = i * sub_size;
38081 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38082 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38083 rtx sub_op2 = ((code == NOT)
38084 ? NULL_RTX
38085 : simplify_subreg (sub_mode, op2, mode, offset));
38087 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38088 complement_final_p, complement_op1_p,
38089 complement_op2_p);
38092 return;
38096 /* Return true if the peephole2 can combine a load involving a combination of
38097 an addis instruction and a load with an offset that can be fused together on
38098 a power8. */
38100 bool
38101 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38102 rtx addis_value, /* addis value. */
38103 rtx target, /* target register that is loaded. */
38104 rtx mem) /* bottom part of the memory addr. */
38106 rtx addr;
38107 rtx base_reg;
38109 /* Validate arguments. */
38110 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38111 return false;
38113 if (!base_reg_operand (target, GET_MODE (target)))
38114 return false;
38116 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38117 return false;
38119 /* Allow sign/zero extension. */
38120 if (GET_CODE (mem) == ZERO_EXTEND
38121 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38122 mem = XEXP (mem, 0);
38124 if (!MEM_P (mem))
38125 return false;
38127 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38128 return false;
38130 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38131 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38132 return false;
38134 /* Validate that the register used to load the high value is either the
38135 register being loaded, or we can safely replace its use.
38137 This function is only called from the peephole2 pass and we assume that
38138 there are 2 instructions in the peephole (addis and load), so we want to
38139 check if the target register was not used in the memory address and the
38140 register to hold the addis result is dead after the peephole. */
38141 if (REGNO (addis_reg) != REGNO (target))
38143 if (reg_mentioned_p (target, mem))
38144 return false;
38146 if (!peep2_reg_dead_p (2, addis_reg))
38147 return false;
38149 /* If the target register being loaded is the stack pointer, we must
38150 avoid loading any other value into it, even temporarily. */
38151 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38152 return false;
38155 base_reg = XEXP (addr, 0);
38156 return REGNO (addis_reg) == REGNO (base_reg);
38159 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38160 sequence. We adjust the addis register to use the target register. If the
38161 load sign extends, we adjust the code to do the zero extending load, and an
38162 explicit sign extension later since the fusion only covers zero extending
38163 loads.
38165 The operands are:
38166 operands[0] register set with addis (to be replaced with target)
38167 operands[1] value set via addis
38168 operands[2] target register being loaded
38169 operands[3] D-form memory reference using operands[0]. */
38171 void
38172 expand_fusion_gpr_load (rtx *operands)
38174 rtx addis_value = operands[1];
38175 rtx target = operands[2];
38176 rtx orig_mem = operands[3];
38177 rtx new_addr, new_mem, orig_addr, offset;
38178 enum rtx_code plus_or_lo_sum;
38179 machine_mode target_mode = GET_MODE (target);
38180 machine_mode extend_mode = target_mode;
38181 machine_mode ptr_mode = Pmode;
38182 enum rtx_code extend = UNKNOWN;
38184 if (GET_CODE (orig_mem) == ZERO_EXTEND
38185 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38187 extend = GET_CODE (orig_mem);
38188 orig_mem = XEXP (orig_mem, 0);
38189 target_mode = GET_MODE (orig_mem);
38192 gcc_assert (MEM_P (orig_mem));
38194 orig_addr = XEXP (orig_mem, 0);
38195 plus_or_lo_sum = GET_CODE (orig_addr);
38196 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38198 offset = XEXP (orig_addr, 1);
38199 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38200 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38202 if (extend != UNKNOWN)
38203 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38205 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38206 UNSPEC_FUSION_GPR);
38207 emit_insn (gen_rtx_SET (target, new_mem));
38209 if (extend == SIGN_EXTEND)
38211 int sub_off = ((BYTES_BIG_ENDIAN)
38212 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38213 : 0);
38214 rtx sign_reg
38215 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38217 emit_insn (gen_rtx_SET (target,
38218 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38221 return;
38224 /* Emit the addis instruction that will be part of a fused instruction
38225 sequence. */
38227 void
38228 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
38229 const char *mode_name)
38231 rtx fuse_ops[10];
38232 char insn_template[80];
38233 const char *addis_str = NULL;
38234 const char *comment_str = ASM_COMMENT_START;
38236 if (*comment_str == ' ')
38237 comment_str++;
38239 /* Emit the addis instruction. */
38240 fuse_ops[0] = target;
38241 if (satisfies_constraint_L (addis_value))
38243 fuse_ops[1] = addis_value;
38244 addis_str = "lis %0,%v1";
38247 else if (GET_CODE (addis_value) == PLUS)
38249 rtx op0 = XEXP (addis_value, 0);
38250 rtx op1 = XEXP (addis_value, 1);
38252 if (REG_P (op0) && CONST_INT_P (op1)
38253 && satisfies_constraint_L (op1))
38255 fuse_ops[1] = op0;
38256 fuse_ops[2] = op1;
38257 addis_str = "addis %0,%1,%v2";
38261 else if (GET_CODE (addis_value) == HIGH)
38263 rtx value = XEXP (addis_value, 0);
38264 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38266 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38267 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38268 if (TARGET_ELF)
38269 addis_str = "addis %0,%2,%1@toc@ha";
38271 else if (TARGET_XCOFF)
38272 addis_str = "addis %0,%1@u(%2)";
38274 else
38275 gcc_unreachable ();
38278 else if (GET_CODE (value) == PLUS)
38280 rtx op0 = XEXP (value, 0);
38281 rtx op1 = XEXP (value, 1);
38283 if (GET_CODE (op0) == UNSPEC
38284 && XINT (op0, 1) == UNSPEC_TOCREL
38285 && CONST_INT_P (op1))
38287 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38288 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38289 fuse_ops[3] = op1;
38290 if (TARGET_ELF)
38291 addis_str = "addis %0,%2,%1+%3@toc@ha";
38293 else if (TARGET_XCOFF)
38294 addis_str = "addis %0,%1+%3@u(%2)";
38296 else
38297 gcc_unreachable ();
38301 else if (satisfies_constraint_L (value))
38303 fuse_ops[1] = value;
38304 addis_str = "lis %0,%v1";
38307 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38309 fuse_ops[1] = value;
38310 addis_str = "lis %0,%1@ha";
38314 if (!addis_str)
38315 fatal_insn ("Could not generate addis value for fusion", addis_value);
38317 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
38318 comment, mode_name);
38319 output_asm_insn (insn_template, fuse_ops);
38322 /* Emit a D-form load or store instruction that is the second instruction
38323 of a fusion sequence. */
38325 void
38326 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
38327 const char *insn_str)
38329 rtx fuse_ops[10];
38330 char insn_template[80];
38332 fuse_ops[0] = load_store_reg;
38333 fuse_ops[1] = addis_reg;
38335 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
38337 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
38338 fuse_ops[2] = offset;
38339 output_asm_insn (insn_template, fuse_ops);
38342 else if (GET_CODE (offset) == UNSPEC
38343 && XINT (offset, 1) == UNSPEC_TOCREL)
38345 if (TARGET_ELF)
38346 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
38348 else if (TARGET_XCOFF)
38349 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38351 else
38352 gcc_unreachable ();
38354 fuse_ops[2] = XVECEXP (offset, 0, 0);
38355 output_asm_insn (insn_template, fuse_ops);
38358 else if (GET_CODE (offset) == PLUS
38359 && GET_CODE (XEXP (offset, 0)) == UNSPEC
38360 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
38361 && CONST_INT_P (XEXP (offset, 1)))
38363 rtx tocrel_unspec = XEXP (offset, 0);
38364 if (TARGET_ELF)
38365 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38367 else if (TARGET_XCOFF)
38368 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38370 else
38371 gcc_unreachable ();
38373 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38374 fuse_ops[3] = XEXP (offset, 1);
38375 output_asm_insn (insn_template, fuse_ops);
38378 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38380 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38382 fuse_ops[2] = offset;
38383 output_asm_insn (insn_template, fuse_ops);
38386 else
38387 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38389 return;
38392 /* Wrap a TOC address that can be fused to indicate that special fusion
38393 processing is needed. */
38396 fusion_wrap_memory_address (rtx old_mem)
38398 rtx old_addr = XEXP (old_mem, 0);
38399 rtvec v = gen_rtvec (1, old_addr);
38400 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
38401 return replace_equiv_address_nv (old_mem, new_addr, false);
38404 /* Given an address, convert it into the addis and load offset parts. Addresses
38405 created during the peephole2 process look like:
38406 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38407 (unspec [(...)] UNSPEC_TOCREL))
38409 Addresses created via toc fusion look like:
38410 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38412 static void
38413 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38415 rtx hi, lo;
38417 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38419 lo = XVECEXP (addr, 0, 0);
38420 hi = gen_rtx_HIGH (Pmode, lo);
38422 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38424 hi = XEXP (addr, 0);
38425 lo = XEXP (addr, 1);
38427 else
38428 gcc_unreachable ();
38430 *p_hi = hi;
38431 *p_lo = lo;
38434 /* Return a string to fuse an addis instruction with a gpr load to the same
38435 register that we loaded up the addis instruction. The address that is used
38436 is the logical address that was formed during peephole2:
38437 (lo_sum (high) (low-part))
38439 Or the address is the TOC address that is wrapped before register allocation:
38440 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38442 The code is complicated, so we call output_asm_insn directly, and just
38443 return "". */
38445 const char *
38446 emit_fusion_gpr_load (rtx target, rtx mem)
38448 rtx addis_value;
38449 rtx addr;
38450 rtx load_offset;
38451 const char *load_str = NULL;
38452 const char *mode_name = NULL;
38453 machine_mode mode;
38455 if (GET_CODE (mem) == ZERO_EXTEND)
38456 mem = XEXP (mem, 0);
38458 gcc_assert (REG_P (target) && MEM_P (mem));
38460 addr = XEXP (mem, 0);
38461 fusion_split_address (addr, &addis_value, &load_offset);
38463 /* Now emit the load instruction to the same register. */
38464 mode = GET_MODE (mem);
38465 switch (mode)
38467 case QImode:
38468 mode_name = "char";
38469 load_str = "lbz";
38470 break;
38472 case HImode:
38473 mode_name = "short";
38474 load_str = "lhz";
38475 break;
38477 case SImode:
38478 case SFmode:
38479 mode_name = (mode == SFmode) ? "float" : "int";
38480 load_str = "lwz";
38481 break;
38483 case DImode:
38484 case DFmode:
38485 gcc_assert (TARGET_POWERPC64);
38486 mode_name = (mode == DFmode) ? "double" : "long";
38487 load_str = "ld";
38488 break;
38490 default:
38491 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38494 /* Emit the addis instruction. */
38495 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
38497 /* Emit the D-form load instruction. */
38498 emit_fusion_load_store (target, target, load_offset, load_str);
38500 return "";
38504 /* Return true if the peephole2 can combine a load/store involving a
38505 combination of an addis instruction and the memory operation. This was
38506 added to the ISA 3.0 (power9) hardware. */
38508 bool
38509 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38510 rtx addis_value, /* addis value. */
38511 rtx dest, /* destination (memory or register). */
38512 rtx src) /* source (register or memory). */
38514 rtx addr, mem, offset;
38515 machine_mode mode = GET_MODE (src);
38517 /* Validate arguments. */
38518 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38519 return false;
38521 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38522 return false;
38524 /* Ignore extend operations that are part of the load. */
38525 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38526 src = XEXP (src, 0);
38528 /* Test for memory<-register or register<-memory. */
38529 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38531 if (!MEM_P (dest))
38532 return false;
38534 mem = dest;
38537 else if (MEM_P (src))
38539 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38540 return false;
38542 mem = src;
38545 else
38546 return false;
38548 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38549 if (GET_CODE (addr) == PLUS)
38551 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38552 return false;
38554 return satisfies_constraint_I (XEXP (addr, 1));
38557 else if (GET_CODE (addr) == LO_SUM)
38559 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38560 return false;
38562 offset = XEXP (addr, 1);
38563 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38564 return small_toc_ref (offset, GET_MODE (offset));
38566 else if (TARGET_ELF && !TARGET_POWERPC64)
38567 return CONSTANT_P (offset);
38570 return false;
38573 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38574 load sequence.
38576 The operands are:
38577 operands[0] register set with addis
38578 operands[1] value set via addis
38579 operands[2] target register being loaded
38580 operands[3] D-form memory reference using operands[0].
38582 This is similar to the fusion introduced with power8, except it scales to
38583 both loads/stores and does not require the result register to be the same as
38584 the base register. At the moment, we only do this if register set with addis
38585 is dead. */
38587 void
38588 expand_fusion_p9_load (rtx *operands)
38590 rtx tmp_reg = operands[0];
38591 rtx addis_value = operands[1];
38592 rtx target = operands[2];
38593 rtx orig_mem = operands[3];
38594 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38595 enum rtx_code plus_or_lo_sum;
38596 machine_mode target_mode = GET_MODE (target);
38597 machine_mode extend_mode = target_mode;
38598 machine_mode ptr_mode = Pmode;
38599 enum rtx_code extend = UNKNOWN;
38601 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
38603 extend = GET_CODE (orig_mem);
38604 orig_mem = XEXP (orig_mem, 0);
38605 target_mode = GET_MODE (orig_mem);
38608 gcc_assert (MEM_P (orig_mem));
38610 orig_addr = XEXP (orig_mem, 0);
38611 plus_or_lo_sum = GET_CODE (orig_addr);
38612 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38614 offset = XEXP (orig_addr, 1);
38615 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38616 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38618 if (extend != UNKNOWN)
38619 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
38621 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38622 UNSPEC_FUSION_P9);
38624 set = gen_rtx_SET (target, new_mem);
38625 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38626 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38627 emit_insn (insn);
38629 return;
38632 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38633 store sequence.
38635 The operands are:
38636 operands[0] register set with addis
38637 operands[1] value set via addis
38638 operands[2] target D-form memory being stored to
38639 operands[3] register being stored
38641 This is similar to the fusion introduced with power8, except it scales to
38642 both loads/stores and does not require the result register to be the same as
38643 the base register. At the moment, we only do this if register set with addis
38644 is dead. */
38646 void
38647 expand_fusion_p9_store (rtx *operands)
38649 rtx tmp_reg = operands[0];
38650 rtx addis_value = operands[1];
38651 rtx orig_mem = operands[2];
38652 rtx src = operands[3];
38653 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
38654 enum rtx_code plus_or_lo_sum;
38655 machine_mode target_mode = GET_MODE (orig_mem);
38656 machine_mode ptr_mode = Pmode;
38658 gcc_assert (MEM_P (orig_mem));
38660 orig_addr = XEXP (orig_mem, 0);
38661 plus_or_lo_sum = GET_CODE (orig_addr);
38662 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38664 offset = XEXP (orig_addr, 1);
38665 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38666 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38668 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
38669 UNSPEC_FUSION_P9);
38671 set = gen_rtx_SET (new_mem, new_src);
38672 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38673 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38674 emit_insn (insn);
38676 return;
38679 /* Return a string to fuse an addis instruction with a load using extended
38680 fusion. The address that is used is the logical address that was formed
38681 during peephole2: (lo_sum (high) (low-part))
38683 The code is complicated, so we call output_asm_insn directly, and just
38684 return "". */
38686 const char *
38687 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
38689 machine_mode mode = GET_MODE (reg);
38690 rtx hi;
38691 rtx lo;
38692 rtx addr;
38693 const char *load_string;
38694 int r;
38696 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
38698 mem = XEXP (mem, 0);
38699 mode = GET_MODE (mem);
38702 if (GET_CODE (reg) == SUBREG)
38704 gcc_assert (SUBREG_BYTE (reg) == 0);
38705 reg = SUBREG_REG (reg);
38708 if (!REG_P (reg))
38709 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
38711 r = REGNO (reg);
38712 if (FP_REGNO_P (r))
38714 if (mode == SFmode)
38715 load_string = "lfs";
38716 else if (mode == DFmode || mode == DImode)
38717 load_string = "lfd";
38718 else
38719 gcc_unreachable ();
38721 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
38723 if (mode == SFmode)
38724 load_string = "lxssp";
38725 else if (mode == DFmode || mode == DImode)
38726 load_string = "lxsd";
38727 else
38728 gcc_unreachable ();
38730 else if (INT_REGNO_P (r))
38732 switch (mode)
38734 case QImode:
38735 load_string = "lbz";
38736 break;
38737 case HImode:
38738 load_string = "lhz";
38739 break;
38740 case SImode:
38741 case SFmode:
38742 load_string = "lwz";
38743 break;
38744 case DImode:
38745 case DFmode:
38746 if (!TARGET_POWERPC64)
38747 gcc_unreachable ();
38748 load_string = "ld";
38749 break;
38750 default:
38751 gcc_unreachable ();
38754 else
38755 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
38757 if (!MEM_P (mem))
38758 fatal_insn ("emit_fusion_p9_load not MEM", mem);
38760 addr = XEXP (mem, 0);
38761 fusion_split_address (addr, &hi, &lo);
38763 /* Emit the addis instruction. */
38764 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
38766 /* Emit the D-form load instruction. */
38767 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
38769 return "";
38772 /* Return a string to fuse an addis instruction with a store using extended
38773 fusion. The address that is used is the logical address that was formed
38774 during peephole2: (lo_sum (high) (low-part))
38776 The code is complicated, so we call output_asm_insn directly, and just
38777 return "". */
38779 const char *
38780 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
38782 machine_mode mode = GET_MODE (reg);
38783 rtx hi;
38784 rtx lo;
38785 rtx addr;
38786 const char *store_string;
38787 int r;
38789 if (GET_CODE (reg) == SUBREG)
38791 gcc_assert (SUBREG_BYTE (reg) == 0);
38792 reg = SUBREG_REG (reg);
38795 if (!REG_P (reg))
38796 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
38798 r = REGNO (reg);
38799 if (FP_REGNO_P (r))
38801 if (mode == SFmode)
38802 store_string = "stfs";
38803 else if (mode == DFmode)
38804 store_string = "stfd";
38805 else
38806 gcc_unreachable ();
38808 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
38810 if (mode == SFmode)
38811 store_string = "stxssp";
38812 else if (mode == DFmode || mode == DImode)
38813 store_string = "stxsd";
38814 else
38815 gcc_unreachable ();
38817 else if (INT_REGNO_P (r))
38819 switch (mode)
38821 case QImode:
38822 store_string = "stb";
38823 break;
38824 case HImode:
38825 store_string = "sth";
38826 break;
38827 case SImode:
38828 case SFmode:
38829 store_string = "stw";
38830 break;
38831 case DImode:
38832 case DFmode:
38833 if (!TARGET_POWERPC64)
38834 gcc_unreachable ();
38835 store_string = "std";
38836 break;
38837 default:
38838 gcc_unreachable ();
38841 else
38842 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
38844 if (!MEM_P (mem))
38845 fatal_insn ("emit_fusion_p9_store not MEM", mem);
38847 addr = XEXP (mem, 0);
38848 fusion_split_address (addr, &hi, &lo);
38850 /* Emit the addis instruction. */
38851 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
38853 /* Emit the D-form load instruction. */
38854 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
38856 return "";
38859 #ifdef RS6000_GLIBC_ATOMIC_FENV
38860 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38861 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38862 #endif
38864 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38866 static void
38867 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38869 if (!TARGET_HARD_FLOAT)
38871 #ifdef RS6000_GLIBC_ATOMIC_FENV
38872 if (atomic_hold_decl == NULL_TREE)
38874 atomic_hold_decl
38875 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38876 get_identifier ("__atomic_feholdexcept"),
38877 build_function_type_list (void_type_node,
38878 double_ptr_type_node,
38879 NULL_TREE));
38880 TREE_PUBLIC (atomic_hold_decl) = 1;
38881 DECL_EXTERNAL (atomic_hold_decl) = 1;
38884 if (atomic_clear_decl == NULL_TREE)
38886 atomic_clear_decl
38887 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38888 get_identifier ("__atomic_feclearexcept"),
38889 build_function_type_list (void_type_node,
38890 NULL_TREE));
38891 TREE_PUBLIC (atomic_clear_decl) = 1;
38892 DECL_EXTERNAL (atomic_clear_decl) = 1;
38895 tree const_double = build_qualified_type (double_type_node,
38896 TYPE_QUAL_CONST);
38897 tree const_double_ptr = build_pointer_type (const_double);
38898 if (atomic_update_decl == NULL_TREE)
38900 atomic_update_decl
38901 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38902 get_identifier ("__atomic_feupdateenv"),
38903 build_function_type_list (void_type_node,
38904 const_double_ptr,
38905 NULL_TREE));
38906 TREE_PUBLIC (atomic_update_decl) = 1;
38907 DECL_EXTERNAL (atomic_update_decl) = 1;
38910 tree fenv_var = create_tmp_var_raw (double_type_node);
38911 TREE_ADDRESSABLE (fenv_var) = 1;
38912 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
38914 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
38915 *clear = build_call_expr (atomic_clear_decl, 0);
38916 *update = build_call_expr (atomic_update_decl, 1,
38917 fold_convert (const_double_ptr, fenv_addr));
38918 #endif
38919 return;
38922 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
38923 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
38924 tree call_mffs = build_call_expr (mffs, 0);
38926 /* Generates the equivalent of feholdexcept (&fenv_var)
38928 *fenv_var = __builtin_mffs ();
38929 double fenv_hold;
38930 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
38931 __builtin_mtfsf (0xff, fenv_hold); */
38933 /* Mask to clear everything except for the rounding modes and non-IEEE
38934 arithmetic flag. */
38935 const unsigned HOST_WIDE_INT hold_exception_mask =
38936 HOST_WIDE_INT_C (0xffffffff00000007);
38938 tree fenv_var = create_tmp_var_raw (double_type_node);
38940 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
38942 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
38943 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38944 build_int_cst (uint64_type_node,
38945 hold_exception_mask));
38947 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38948 fenv_llu_and);
38950 tree hold_mtfsf = build_call_expr (mtfsf, 2,
38951 build_int_cst (unsigned_type_node, 0xff),
38952 fenv_hold_mtfsf);
38954 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
38956 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
38958 double fenv_clear = __builtin_mffs ();
38959 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
38960 __builtin_mtfsf (0xff, fenv_clear); */
38962 /* Mask to clear everything except for the rounding modes and non-IEEE
38963 arithmetic flag. */
38964 const unsigned HOST_WIDE_INT clear_exception_mask =
38965 HOST_WIDE_INT_C (0xffffffff00000000);
38967 tree fenv_clear = create_tmp_var_raw (double_type_node);
38969 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
38971 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
38972 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
38973 fenv_clean_llu,
38974 build_int_cst (uint64_type_node,
38975 clear_exception_mask));
38977 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38978 fenv_clear_llu_and);
38980 tree clear_mtfsf = build_call_expr (mtfsf, 2,
38981 build_int_cst (unsigned_type_node, 0xff),
38982 fenv_clear_mtfsf);
38984 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
38986 /* Generates the equivalent of feupdateenv (&fenv_var)
38988 double old_fenv = __builtin_mffs ();
38989 double fenv_update;
38990 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
38991 (*(uint64_t*)fenv_var 0x1ff80fff);
38992 __builtin_mtfsf (0xff, fenv_update); */
38994 const unsigned HOST_WIDE_INT update_exception_mask =
38995 HOST_WIDE_INT_C (0xffffffff1fffff00);
38996 const unsigned HOST_WIDE_INT new_exception_mask =
38997 HOST_WIDE_INT_C (0x1ff80fff);
38999 tree old_fenv = create_tmp_var_raw (double_type_node);
39000 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
39002 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
39003 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
39004 build_int_cst (uint64_type_node,
39005 update_exception_mask));
39007 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39008 build_int_cst (uint64_type_node,
39009 new_exception_mask));
39011 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
39012 old_llu_and, new_llu_and);
39014 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39015 new_llu_mask);
39017 tree update_mtfsf = build_call_expr (mtfsf, 2,
39018 build_int_cst (unsigned_type_node, 0xff),
39019 fenv_update_mtfsf);
39021 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
39024 void
39025 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
39027 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39029 rtx_tmp0 = gen_reg_rtx (V2DImode);
39030 rtx_tmp1 = gen_reg_rtx (V2DImode);
39032 /* The destination of the vmrgew instruction layout is:
39033 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39034 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39035 vmrgew instruction will be correct. */
39036 if (VECTOR_ELT_ORDER_BIG)
39038 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
39039 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
39041 else
39043 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
39044 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
39047 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39048 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39050 if (signed_convert)
39052 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
39053 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
39055 else
39057 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
39058 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
39061 if (VECTOR_ELT_ORDER_BIG)
39062 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39063 else
39064 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39067 void
39068 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
39069 rtx src2)
39071 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39073 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39074 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39076 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
39077 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
39079 rtx_tmp2 = gen_reg_rtx (V4SImode);
39080 rtx_tmp3 = gen_reg_rtx (V4SImode);
39082 if (signed_convert)
39084 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
39085 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
39087 else
39089 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
39090 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
39093 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
39096 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
39098 static bool
39099 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
39100 optimization_type opt_type)
39102 switch (op)
39104 case rsqrt_optab:
39105 return (opt_type == OPTIMIZE_FOR_SPEED
39106 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
39108 default:
39109 return true;
39113 struct gcc_target targetm = TARGET_INITIALIZER;
39115 #include "gt-rs6000.h"