re PR target/85657 (Make __ibm128 a separate type, even if long double uses the IBM...
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobbbee6cbc1e3e0f2ad0438ddc79cbe0d420001860
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "dbxout.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
65 #include "intl.h"
66 #include "params.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "except.h"
75 #if TARGET_XCOFF
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #endif
78 #if TARGET_MACHO
79 #include "gstab.h" /* for N_SLINE */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "tree-ssa-propagate.h"
85 /* This file should be included last. */
86 #include "target-def.h"
88 #ifndef TARGET_NO_PROTOTYPE
89 #define TARGET_NO_PROTOTYPE 0
90 #endif
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 properly defined. */
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
101 #else
102 #define TARGET_IEEEQUAD_DEFAULT 0
103 #endif
104 #endif
106 #define min(A,B) ((A) < (B) ? (A) : (B))
107 #define max(A,B) ((A) > (B) ? (A) : (B))
109 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
111 /* Structure used to define the rs6000 stack */
112 typedef struct rs6000_stack {
113 int reload_completed; /* stack info won't change from here on */
114 int first_gp_reg_save; /* first callee saved GP register used */
115 int first_fp_reg_save; /* first callee saved FP register used */
116 int first_altivec_reg_save; /* first callee saved AltiVec register used */
117 int lr_save_p; /* true if the link reg needs to be saved */
118 int cr_save_p; /* true if the CR reg needs to be saved */
119 unsigned int vrsave_mask; /* mask of vec registers to save */
120 int push_p; /* true if we need to allocate stack space */
121 int calls_p; /* true if the function makes any calls */
122 int world_save_p; /* true if we're saving *everything*:
123 r13-r31, cr, f14-f31, vrsave, v20-v31 */
124 enum rs6000_abi abi; /* which ABI to use */
125 int gp_save_offset; /* offset to save GP regs from initial SP */
126 int fp_save_offset; /* offset to save FP regs from initial SP */
127 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
128 int lr_save_offset; /* offset to save LR from initial SP */
129 int cr_save_offset; /* offset to save CR from initial SP */
130 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
131 int varargs_save_offset; /* offset to save the varargs registers */
132 int ehrd_offset; /* offset to EH return data */
133 int ehcr_offset; /* offset to EH CR field data */
134 int reg_size; /* register size (4 or 8) */
135 HOST_WIDE_INT vars_size; /* variable save area size */
136 int parm_size; /* outgoing parameter size */
137 int save_size; /* save area size */
138 int fixed_size; /* fixed size of stack frame */
139 int gp_size; /* size of saved GP registers */
140 int fp_size; /* size of saved FP registers */
141 int altivec_size; /* size of saved AltiVec registers */
142 int cr_size; /* size to hold CR if not in fixed area */
143 int vrsave_size; /* size to hold VRSAVE */
144 int altivec_padding_size; /* size of altivec alignment padding */
145 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
146 int savres_strategy;
147 } rs6000_stack_t;
149 /* A C structure for machine-specific, per-function data.
150 This is added to the cfun structure. */
151 typedef struct GTY(()) machine_function
153 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
154 int ra_needs_full_frame;
155 /* Flags if __builtin_return_address (0) was used. */
156 int ra_need_lr;
157 /* Cache lr_save_p after expansion of builtin_eh_return. */
158 int lr_save_state;
159 /* Whether we need to save the TOC to the reserved stack location in the
160 function prologue. */
161 bool save_toc_in_prologue;
162 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
163 varargs save area. */
164 HOST_WIDE_INT varargs_save_offset;
165 /* Alternative internal arg pointer for -fsplit-stack. */
166 rtx split_stack_arg_pointer;
167 bool split_stack_argp_used;
168 /* Flag if r2 setup is needed with ELFv2 ABI. */
169 bool r2_setup_needed;
170 /* The number of components we use for separate shrink-wrapping. */
171 int n_components;
172 /* The components already handled by separate shrink-wrapping, which should
173 not be considered by the prologue and epilogue. */
174 bool gpr_is_wrapped_separately[32];
175 bool fpr_is_wrapped_separately[32];
176 bool lr_is_wrapped_separately;
177 bool toc_is_wrapped_separately;
178 } machine_function;
180 /* Support targetm.vectorize.builtin_mask_for_load. */
181 static GTY(()) tree altivec_builtin_mask_for_load;
183 /* Set to nonzero once AIX common-mode calls have been defined. */
184 static GTY(()) int common_mode_defined;
186 /* Label number of label created for -mrelocatable, to call to so we can
187 get the address of the GOT section */
188 static int rs6000_pic_labelno;
190 #ifdef USING_ELFOS_H
191 /* Counter for labels which are to be placed in .fixup. */
192 int fixuplabelno = 0;
193 #endif
195 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
196 int dot_symbols;
198 /* Specify the machine mode that pointers have. After generation of rtl, the
199 compiler makes no further distinction between pointers and any other objects
200 of this machine mode. */
201 scalar_int_mode rs6000_pmode;
203 /* Note whether IEEE 128-bit floating point was passed or returned, either as
204 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
205 floating point. We changed the default C++ mangling for these types and we
206 may want to generate a weak alias of the old mangling (U10__float128) to the
207 new mangling (u9__ieee128). */
208 static bool rs6000_passes_ieee128;
210 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
211 name used in current releases (i.e. u9__ieee128). */
212 static bool ieee128_mangling_gcc_8_1;
214 /* Width in bits of a pointer. */
215 unsigned rs6000_pointer_size;
217 #ifdef HAVE_AS_GNU_ATTRIBUTE
218 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
219 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
220 # endif
221 /* Flag whether floating point values have been passed/returned.
222 Note that this doesn't say whether fprs are used, since the
223 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
224 should be set for soft-float values passed in gprs and ieee128
225 values passed in vsx registers. */
226 static bool rs6000_passes_float;
227 static bool rs6000_passes_long_double;
228 /* Flag whether vector values have been passed/returned. */
229 static bool rs6000_passes_vector;
230 /* Flag whether small (<= 8 byte) structures have been returned. */
231 static bool rs6000_returns_struct;
232 #endif
234 /* Value is TRUE if register/mode pair is acceptable. */
235 static bool rs6000_hard_regno_mode_ok_p
236 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
238 /* Maximum number of registers needed for a given register class and mode. */
239 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
241 /* How many registers are needed for a given register and mode. */
242 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
244 /* Map register number to register class. */
245 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
247 static int dbg_cost_ctrl;
249 /* Built in types. */
250 tree rs6000_builtin_types[RS6000_BTI_MAX];
251 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
253 /* Flag to say the TOC is initialized */
254 int toc_initialized, need_toc_init;
255 char toc_label_name[10];
257 /* Cached value of rs6000_variable_issue. This is cached in
258 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
259 static short cached_can_issue_more;
261 static GTY(()) section *read_only_data_section;
262 static GTY(()) section *private_data_section;
263 static GTY(()) section *tls_data_section;
264 static GTY(()) section *tls_private_data_section;
265 static GTY(()) section *read_only_private_data_section;
266 static GTY(()) section *sdata2_section;
267 static GTY(()) section *toc_section;
269 struct builtin_description
271 const HOST_WIDE_INT mask;
272 const enum insn_code icode;
273 const char *const name;
274 const enum rs6000_builtins code;
277 /* Describe the vector unit used for modes. */
278 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
279 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
281 /* Register classes for various constraints that are based on the target
282 switches. */
283 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
285 /* Describe the alignment of a vector. */
286 int rs6000_vector_align[NUM_MACHINE_MODES];
288 /* Map selected modes to types for builtins. */
289 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
291 /* What modes to automatically generate reciprocal divide estimate (fre) and
292 reciprocal sqrt (frsqrte) for. */
293 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
295 /* Masks to determine which reciprocal esitmate instructions to generate
296 automatically. */
297 enum rs6000_recip_mask {
298 RECIP_SF_DIV = 0x001, /* Use divide estimate */
299 RECIP_DF_DIV = 0x002,
300 RECIP_V4SF_DIV = 0x004,
301 RECIP_V2DF_DIV = 0x008,
303 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
304 RECIP_DF_RSQRT = 0x020,
305 RECIP_V4SF_RSQRT = 0x040,
306 RECIP_V2DF_RSQRT = 0x080,
308 /* Various combination of flags for -mrecip=xxx. */
309 RECIP_NONE = 0,
310 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
311 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
312 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
314 RECIP_HIGH_PRECISION = RECIP_ALL,
316 /* On low precision machines like the power5, don't enable double precision
317 reciprocal square root estimate, since it isn't accurate enough. */
318 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
321 /* -mrecip options. */
322 static struct
324 const char *string; /* option name */
325 unsigned int mask; /* mask bits to set */
326 } recip_options[] = {
327 { "all", RECIP_ALL },
328 { "none", RECIP_NONE },
329 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
330 | RECIP_V2DF_DIV) },
331 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
332 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
333 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
334 | RECIP_V2DF_RSQRT) },
335 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
336 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
339 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
340 static const struct
342 const char *cpu;
343 unsigned int cpuid;
344 } cpu_is_info[] = {
345 { "power9", PPC_PLATFORM_POWER9 },
346 { "power8", PPC_PLATFORM_POWER8 },
347 { "power7", PPC_PLATFORM_POWER7 },
348 { "power6x", PPC_PLATFORM_POWER6X },
349 { "power6", PPC_PLATFORM_POWER6 },
350 { "power5+", PPC_PLATFORM_POWER5_PLUS },
351 { "power5", PPC_PLATFORM_POWER5 },
352 { "ppc970", PPC_PLATFORM_PPC970 },
353 { "power4", PPC_PLATFORM_POWER4 },
354 { "ppca2", PPC_PLATFORM_PPCA2 },
355 { "ppc476", PPC_PLATFORM_PPC476 },
356 { "ppc464", PPC_PLATFORM_PPC464 },
357 { "ppc440", PPC_PLATFORM_PPC440 },
358 { "ppc405", PPC_PLATFORM_PPC405 },
359 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
362 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
363 static const struct
365 const char *hwcap;
366 int mask;
367 unsigned int id;
368 } cpu_supports_info[] = {
369 /* AT_HWCAP masks. */
370 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
371 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
372 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
373 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
374 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
375 { "booke", PPC_FEATURE_BOOKE, 0 },
376 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
377 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
378 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
379 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
380 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
381 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
382 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
383 { "notb", PPC_FEATURE_NO_TB, 0 },
384 { "pa6t", PPC_FEATURE_PA6T, 0 },
385 { "power4", PPC_FEATURE_POWER4, 0 },
386 { "power5", PPC_FEATURE_POWER5, 0 },
387 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
388 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
389 { "ppc32", PPC_FEATURE_32, 0 },
390 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
391 { "ppc64", PPC_FEATURE_64, 0 },
392 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
393 { "smt", PPC_FEATURE_SMT, 0 },
394 { "spe", PPC_FEATURE_HAS_SPE, 0 },
395 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
396 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
397 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
399 /* AT_HWCAP2 masks. */
400 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
401 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
402 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
403 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
404 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
405 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
406 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
407 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
408 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
409 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
410 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
411 { "darn", PPC_FEATURE2_DARN, 1 },
412 { "scv", PPC_FEATURE2_SCV, 1 }
415 /* On PowerPC, we have a limited number of target clones that we care about
416 which means we can use an array to hold the options, rather than having more
417 elaborate data structures to identify each possible variation. Order the
418 clones from the default to the highest ISA. */
419 enum {
420 CLONE_DEFAULT = 0, /* default clone. */
421 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
422 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
423 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
424 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
425 CLONE_MAX
428 /* Map compiler ISA bits into HWCAP names. */
429 struct clone_map {
430 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
431 const char *name; /* name to use in __builtin_cpu_supports. */
434 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
435 { 0, "" }, /* Default options. */
436 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
437 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
438 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
439 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
443 /* Newer LIBCs explicitly export this symbol to declare that they provide
444 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
445 reference to this symbol whenever we expand a CPU builtin, so that
446 we never link against an old LIBC. */
447 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
449 /* True if we have expanded a CPU builtin. */
450 bool cpu_builtin_p;
452 /* Pointer to function (in rs6000-c.c) that can define or undefine target
453 macros that have changed. Languages that don't support the preprocessor
454 don't link in rs6000-c.c, so we can't call it directly. */
455 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
457 /* Simplfy register classes into simpler classifications. We assume
458 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
459 check for standard register classes (gpr/floating/altivec/vsx) and
460 floating/vector classes (float/altivec/vsx). */
462 enum rs6000_reg_type {
463 NO_REG_TYPE,
464 PSEUDO_REG_TYPE,
465 GPR_REG_TYPE,
466 VSX_REG_TYPE,
467 ALTIVEC_REG_TYPE,
468 FPR_REG_TYPE,
469 SPR_REG_TYPE,
470 CR_REG_TYPE
473 /* Map register class to register type. */
474 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
476 /* First/last register type for the 'normal' register types (i.e. general
477 purpose, floating point, altivec, and VSX registers). */
478 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
480 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
483 /* Register classes we care about in secondary reload or go if legitimate
484 address. We only need to worry about GPR, FPR, and Altivec registers here,
485 along an ANY field that is the OR of the 3 register classes. */
487 enum rs6000_reload_reg_type {
488 RELOAD_REG_GPR, /* General purpose registers. */
489 RELOAD_REG_FPR, /* Traditional floating point regs. */
490 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
491 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
492 N_RELOAD_REG
495 /* For setting up register classes, loop through the 3 register classes mapping
496 into real registers, and skip the ANY class, which is just an OR of the
497 bits. */
498 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
499 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
501 /* Map reload register type to a register in the register class. */
502 struct reload_reg_map_type {
503 const char *name; /* Register class name. */
504 int reg; /* Register in the register class. */
507 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
508 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
509 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
510 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
511 { "Any", -1 }, /* RELOAD_REG_ANY. */
514 /* Mask bits for each register class, indexed per mode. Historically the
515 compiler has been more restrictive which types can do PRE_MODIFY instead of
516 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
517 typedef unsigned char addr_mask_type;
519 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
520 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
521 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
522 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
523 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
524 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
525 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
526 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
528 /* Register type masks based on the type, of valid addressing modes. */
529 struct rs6000_reg_addr {
530 enum insn_code reload_load; /* INSN to reload for loading. */
531 enum insn_code reload_store; /* INSN to reload for storing. */
532 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
533 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
534 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
535 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
536 /* INSNs for fusing addi with loads
537 or stores for each reg. class. */
538 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
539 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
540 /* INSNs for fusing addis with loads
541 or stores for each reg. class. */
542 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
543 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
544 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
545 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
546 bool fused_toc; /* Mode supports TOC fusion. */
549 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
551 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
552 static inline bool
553 mode_supports_pre_incdec_p (machine_mode mode)
555 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
556 != 0);
559 /* Helper function to say whether a mode supports PRE_MODIFY. */
560 static inline bool
561 mode_supports_pre_modify_p (machine_mode mode)
563 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
564 != 0);
567 /* Return true if we have D-form addressing in altivec registers. */
568 static inline bool
569 mode_supports_vmx_dform (machine_mode mode)
571 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
574 /* Return true if we have D-form addressing in VSX registers. This addressing
575 is more limited than normal d-form addressing in that the offset must be
576 aligned on a 16-byte boundary. */
577 static inline bool
578 mode_supports_dq_form (machine_mode mode)
580 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
581 != 0);
584 /* Given that there exists at least one variable that is set (produced)
585 by OUT_INSN and read (consumed) by IN_INSN, return true iff
586 IN_INSN represents one or more memory store operations and none of
587 the variables set by OUT_INSN is used by IN_INSN as the address of a
588 store operation. If either IN_INSN or OUT_INSN does not represent
589 a "single" RTL SET expression (as loosely defined by the
590 implementation of the single_set function) or a PARALLEL with only
591 SETs, CLOBBERs, and USEs inside, this function returns false.
593 This rs6000-specific version of store_data_bypass_p checks for
594 certain conditions that result in assertion failures (and internal
595 compiler errors) in the generic store_data_bypass_p function and
596 returns false rather than calling store_data_bypass_p if one of the
597 problematic conditions is detected. */
600 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
602 rtx out_set, in_set;
603 rtx out_pat, in_pat;
604 rtx out_exp, in_exp;
605 int i, j;
607 in_set = single_set (in_insn);
608 if (in_set)
610 if (MEM_P (SET_DEST (in_set)))
612 out_set = single_set (out_insn);
613 if (!out_set)
615 out_pat = PATTERN (out_insn);
616 if (GET_CODE (out_pat) == PARALLEL)
618 for (i = 0; i < XVECLEN (out_pat, 0); i++)
620 out_exp = XVECEXP (out_pat, 0, i);
621 if ((GET_CODE (out_exp) == CLOBBER)
622 || (GET_CODE (out_exp) == USE))
623 continue;
624 else if (GET_CODE (out_exp) != SET)
625 return false;
631 else
633 in_pat = PATTERN (in_insn);
634 if (GET_CODE (in_pat) != PARALLEL)
635 return false;
637 for (i = 0; i < XVECLEN (in_pat, 0); i++)
639 in_exp = XVECEXP (in_pat, 0, i);
640 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
641 continue;
642 else if (GET_CODE (in_exp) != SET)
643 return false;
645 if (MEM_P (SET_DEST (in_exp)))
647 out_set = single_set (out_insn);
648 if (!out_set)
650 out_pat = PATTERN (out_insn);
651 if (GET_CODE (out_pat) != PARALLEL)
652 return false;
653 for (j = 0; j < XVECLEN (out_pat, 0); j++)
655 out_exp = XVECEXP (out_pat, 0, j);
656 if ((GET_CODE (out_exp) == CLOBBER)
657 || (GET_CODE (out_exp) == USE))
658 continue;
659 else if (GET_CODE (out_exp) != SET)
660 return false;
666 return store_data_bypass_p (out_insn, in_insn);
670 /* Processor costs (relative to an add) */
672 const struct processor_costs *rs6000_cost;
674 /* Instruction size costs on 32bit processors. */
675 static const
676 struct processor_costs size32_cost = {
677 COSTS_N_INSNS (1), /* mulsi */
678 COSTS_N_INSNS (1), /* mulsi_const */
679 COSTS_N_INSNS (1), /* mulsi_const9 */
680 COSTS_N_INSNS (1), /* muldi */
681 COSTS_N_INSNS (1), /* divsi */
682 COSTS_N_INSNS (1), /* divdi */
683 COSTS_N_INSNS (1), /* fp */
684 COSTS_N_INSNS (1), /* dmul */
685 COSTS_N_INSNS (1), /* sdiv */
686 COSTS_N_INSNS (1), /* ddiv */
687 32, /* cache line size */
688 0, /* l1 cache */
689 0, /* l2 cache */
690 0, /* streams */
691 0, /* SF->DF convert */
694 /* Instruction size costs on 64bit processors. */
695 static const
696 struct processor_costs size64_cost = {
697 COSTS_N_INSNS (1), /* mulsi */
698 COSTS_N_INSNS (1), /* mulsi_const */
699 COSTS_N_INSNS (1), /* mulsi_const9 */
700 COSTS_N_INSNS (1), /* muldi */
701 COSTS_N_INSNS (1), /* divsi */
702 COSTS_N_INSNS (1), /* divdi */
703 COSTS_N_INSNS (1), /* fp */
704 COSTS_N_INSNS (1), /* dmul */
705 COSTS_N_INSNS (1), /* sdiv */
706 COSTS_N_INSNS (1), /* ddiv */
707 128, /* cache line size */
708 0, /* l1 cache */
709 0, /* l2 cache */
710 0, /* streams */
711 0, /* SF->DF convert */
714 /* Instruction costs on RS64A processors. */
715 static const
716 struct processor_costs rs64a_cost = {
717 COSTS_N_INSNS (20), /* mulsi */
718 COSTS_N_INSNS (12), /* mulsi_const */
719 COSTS_N_INSNS (8), /* mulsi_const9 */
720 COSTS_N_INSNS (34), /* muldi */
721 COSTS_N_INSNS (65), /* divsi */
722 COSTS_N_INSNS (67), /* divdi */
723 COSTS_N_INSNS (4), /* fp */
724 COSTS_N_INSNS (4), /* dmul */
725 COSTS_N_INSNS (31), /* sdiv */
726 COSTS_N_INSNS (31), /* ddiv */
727 128, /* cache line size */
728 128, /* l1 cache */
729 2048, /* l2 cache */
730 1, /* streams */
731 0, /* SF->DF convert */
734 /* Instruction costs on MPCCORE processors. */
735 static const
736 struct processor_costs mpccore_cost = {
737 COSTS_N_INSNS (2), /* mulsi */
738 COSTS_N_INSNS (2), /* mulsi_const */
739 COSTS_N_INSNS (2), /* mulsi_const9 */
740 COSTS_N_INSNS (2), /* muldi */
741 COSTS_N_INSNS (6), /* divsi */
742 COSTS_N_INSNS (6), /* divdi */
743 COSTS_N_INSNS (4), /* fp */
744 COSTS_N_INSNS (5), /* dmul */
745 COSTS_N_INSNS (10), /* sdiv */
746 COSTS_N_INSNS (17), /* ddiv */
747 32, /* cache line size */
748 4, /* l1 cache */
749 16, /* l2 cache */
750 1, /* streams */
751 0, /* SF->DF convert */
754 /* Instruction costs on PPC403 processors. */
755 static const
756 struct processor_costs ppc403_cost = {
757 COSTS_N_INSNS (4), /* mulsi */
758 COSTS_N_INSNS (4), /* mulsi_const */
759 COSTS_N_INSNS (4), /* mulsi_const9 */
760 COSTS_N_INSNS (4), /* muldi */
761 COSTS_N_INSNS (33), /* divsi */
762 COSTS_N_INSNS (33), /* divdi */
763 COSTS_N_INSNS (11), /* fp */
764 COSTS_N_INSNS (11), /* dmul */
765 COSTS_N_INSNS (11), /* sdiv */
766 COSTS_N_INSNS (11), /* ddiv */
767 32, /* cache line size */
768 4, /* l1 cache */
769 16, /* l2 cache */
770 1, /* streams */
771 0, /* SF->DF convert */
774 /* Instruction costs on PPC405 processors. */
775 static const
776 struct processor_costs ppc405_cost = {
777 COSTS_N_INSNS (5), /* mulsi */
778 COSTS_N_INSNS (4), /* mulsi_const */
779 COSTS_N_INSNS (3), /* mulsi_const9 */
780 COSTS_N_INSNS (5), /* muldi */
781 COSTS_N_INSNS (35), /* divsi */
782 COSTS_N_INSNS (35), /* divdi */
783 COSTS_N_INSNS (11), /* fp */
784 COSTS_N_INSNS (11), /* dmul */
785 COSTS_N_INSNS (11), /* sdiv */
786 COSTS_N_INSNS (11), /* ddiv */
787 32, /* cache line size */
788 16, /* l1 cache */
789 128, /* l2 cache */
790 1, /* streams */
791 0, /* SF->DF convert */
794 /* Instruction costs on PPC440 processors. */
795 static const
796 struct processor_costs ppc440_cost = {
797 COSTS_N_INSNS (3), /* mulsi */
798 COSTS_N_INSNS (2), /* mulsi_const */
799 COSTS_N_INSNS (2), /* mulsi_const9 */
800 COSTS_N_INSNS (3), /* muldi */
801 COSTS_N_INSNS (34), /* divsi */
802 COSTS_N_INSNS (34), /* divdi */
803 COSTS_N_INSNS (5), /* fp */
804 COSTS_N_INSNS (5), /* dmul */
805 COSTS_N_INSNS (19), /* sdiv */
806 COSTS_N_INSNS (33), /* ddiv */
807 32, /* cache line size */
808 32, /* l1 cache */
809 256, /* l2 cache */
810 1, /* streams */
811 0, /* SF->DF convert */
814 /* Instruction costs on PPC476 processors. */
815 static const
816 struct processor_costs ppc476_cost = {
817 COSTS_N_INSNS (4), /* mulsi */
818 COSTS_N_INSNS (4), /* mulsi_const */
819 COSTS_N_INSNS (4), /* mulsi_const9 */
820 COSTS_N_INSNS (4), /* muldi */
821 COSTS_N_INSNS (11), /* divsi */
822 COSTS_N_INSNS (11), /* divdi */
823 COSTS_N_INSNS (6), /* fp */
824 COSTS_N_INSNS (6), /* dmul */
825 COSTS_N_INSNS (19), /* sdiv */
826 COSTS_N_INSNS (33), /* ddiv */
827 32, /* l1 cache line size */
828 32, /* l1 cache */
829 512, /* l2 cache */
830 1, /* streams */
831 0, /* SF->DF convert */
834 /* Instruction costs on PPC601 processors. */
835 static const
836 struct processor_costs ppc601_cost = {
837 COSTS_N_INSNS (5), /* mulsi */
838 COSTS_N_INSNS (5), /* mulsi_const */
839 COSTS_N_INSNS (5), /* mulsi_const9 */
840 COSTS_N_INSNS (5), /* muldi */
841 COSTS_N_INSNS (36), /* divsi */
842 COSTS_N_INSNS (36), /* divdi */
843 COSTS_N_INSNS (4), /* fp */
844 COSTS_N_INSNS (5), /* dmul */
845 COSTS_N_INSNS (17), /* sdiv */
846 COSTS_N_INSNS (31), /* ddiv */
847 32, /* cache line size */
848 32, /* l1 cache */
849 256, /* l2 cache */
850 1, /* streams */
851 0, /* SF->DF convert */
854 /* Instruction costs on PPC603 processors. */
855 static const
856 struct processor_costs ppc603_cost = {
857 COSTS_N_INSNS (5), /* mulsi */
858 COSTS_N_INSNS (3), /* mulsi_const */
859 COSTS_N_INSNS (2), /* mulsi_const9 */
860 COSTS_N_INSNS (5), /* muldi */
861 COSTS_N_INSNS (37), /* divsi */
862 COSTS_N_INSNS (37), /* divdi */
863 COSTS_N_INSNS (3), /* fp */
864 COSTS_N_INSNS (4), /* dmul */
865 COSTS_N_INSNS (18), /* sdiv */
866 COSTS_N_INSNS (33), /* ddiv */
867 32, /* cache line size */
868 8, /* l1 cache */
869 64, /* l2 cache */
870 1, /* streams */
871 0, /* SF->DF convert */
874 /* Instruction costs on PPC604 processors. */
875 static const
876 struct processor_costs ppc604_cost = {
877 COSTS_N_INSNS (4), /* mulsi */
878 COSTS_N_INSNS (4), /* mulsi_const */
879 COSTS_N_INSNS (4), /* mulsi_const9 */
880 COSTS_N_INSNS (4), /* muldi */
881 COSTS_N_INSNS (20), /* divsi */
882 COSTS_N_INSNS (20), /* divdi */
883 COSTS_N_INSNS (3), /* fp */
884 COSTS_N_INSNS (3), /* dmul */
885 COSTS_N_INSNS (18), /* sdiv */
886 COSTS_N_INSNS (32), /* ddiv */
887 32, /* cache line size */
888 16, /* l1 cache */
889 512, /* l2 cache */
890 1, /* streams */
891 0, /* SF->DF convert */
894 /* Instruction costs on PPC604e processors. */
895 static const
896 struct processor_costs ppc604e_cost = {
897 COSTS_N_INSNS (2), /* mulsi */
898 COSTS_N_INSNS (2), /* mulsi_const */
899 COSTS_N_INSNS (2), /* mulsi_const9 */
900 COSTS_N_INSNS (2), /* muldi */
901 COSTS_N_INSNS (20), /* divsi */
902 COSTS_N_INSNS (20), /* divdi */
903 COSTS_N_INSNS (3), /* fp */
904 COSTS_N_INSNS (3), /* dmul */
905 COSTS_N_INSNS (18), /* sdiv */
906 COSTS_N_INSNS (32), /* ddiv */
907 32, /* cache line size */
908 32, /* l1 cache */
909 1024, /* l2 cache */
910 1, /* streams */
911 0, /* SF->DF convert */
914 /* Instruction costs on PPC620 processors. */
915 static const
916 struct processor_costs ppc620_cost = {
917 COSTS_N_INSNS (5), /* mulsi */
918 COSTS_N_INSNS (4), /* mulsi_const */
919 COSTS_N_INSNS (3), /* mulsi_const9 */
920 COSTS_N_INSNS (7), /* muldi */
921 COSTS_N_INSNS (21), /* divsi */
922 COSTS_N_INSNS (37), /* divdi */
923 COSTS_N_INSNS (3), /* fp */
924 COSTS_N_INSNS (3), /* dmul */
925 COSTS_N_INSNS (18), /* sdiv */
926 COSTS_N_INSNS (32), /* ddiv */
927 128, /* cache line size */
928 32, /* l1 cache */
929 1024, /* l2 cache */
930 1, /* streams */
931 0, /* SF->DF convert */
934 /* Instruction costs on PPC630 processors. */
935 static const
936 struct processor_costs ppc630_cost = {
937 COSTS_N_INSNS (5), /* mulsi */
938 COSTS_N_INSNS (4), /* mulsi_const */
939 COSTS_N_INSNS (3), /* mulsi_const9 */
940 COSTS_N_INSNS (7), /* muldi */
941 COSTS_N_INSNS (21), /* divsi */
942 COSTS_N_INSNS (37), /* divdi */
943 COSTS_N_INSNS (3), /* fp */
944 COSTS_N_INSNS (3), /* dmul */
945 COSTS_N_INSNS (17), /* sdiv */
946 COSTS_N_INSNS (21), /* ddiv */
947 128, /* cache line size */
948 64, /* l1 cache */
949 1024, /* l2 cache */
950 1, /* streams */
951 0, /* SF->DF convert */
954 /* Instruction costs on Cell processor. */
955 /* COSTS_N_INSNS (1) ~ one add. */
956 static const
957 struct processor_costs ppccell_cost = {
958 COSTS_N_INSNS (9/2)+2, /* mulsi */
959 COSTS_N_INSNS (6/2), /* mulsi_const */
960 COSTS_N_INSNS (6/2), /* mulsi_const9 */
961 COSTS_N_INSNS (15/2)+2, /* muldi */
962 COSTS_N_INSNS (38/2), /* divsi */
963 COSTS_N_INSNS (70/2), /* divdi */
964 COSTS_N_INSNS (10/2), /* fp */
965 COSTS_N_INSNS (10/2), /* dmul */
966 COSTS_N_INSNS (74/2), /* sdiv */
967 COSTS_N_INSNS (74/2), /* ddiv */
968 128, /* cache line size */
969 32, /* l1 cache */
970 512, /* l2 cache */
971 6, /* streams */
972 0, /* SF->DF convert */
975 /* Instruction costs on PPC750 and PPC7400 processors. */
976 static const
977 struct processor_costs ppc750_cost = {
978 COSTS_N_INSNS (5), /* mulsi */
979 COSTS_N_INSNS (3), /* mulsi_const */
980 COSTS_N_INSNS (2), /* mulsi_const9 */
981 COSTS_N_INSNS (5), /* muldi */
982 COSTS_N_INSNS (17), /* divsi */
983 COSTS_N_INSNS (17), /* divdi */
984 COSTS_N_INSNS (3), /* fp */
985 COSTS_N_INSNS (3), /* dmul */
986 COSTS_N_INSNS (17), /* sdiv */
987 COSTS_N_INSNS (31), /* ddiv */
988 32, /* cache line size */
989 32, /* l1 cache */
990 512, /* l2 cache */
991 1, /* streams */
992 0, /* SF->DF convert */
995 /* Instruction costs on PPC7450 processors. */
996 static const
997 struct processor_costs ppc7450_cost = {
998 COSTS_N_INSNS (4), /* mulsi */
999 COSTS_N_INSNS (3), /* mulsi_const */
1000 COSTS_N_INSNS (3), /* mulsi_const9 */
1001 COSTS_N_INSNS (4), /* muldi */
1002 COSTS_N_INSNS (23), /* divsi */
1003 COSTS_N_INSNS (23), /* divdi */
1004 COSTS_N_INSNS (5), /* fp */
1005 COSTS_N_INSNS (5), /* dmul */
1006 COSTS_N_INSNS (21), /* sdiv */
1007 COSTS_N_INSNS (35), /* ddiv */
1008 32, /* cache line size */
1009 32, /* l1 cache */
1010 1024, /* l2 cache */
1011 1, /* streams */
1012 0, /* SF->DF convert */
1015 /* Instruction costs on PPC8540 processors. */
1016 static const
1017 struct processor_costs ppc8540_cost = {
1018 COSTS_N_INSNS (4), /* mulsi */
1019 COSTS_N_INSNS (4), /* mulsi_const */
1020 COSTS_N_INSNS (4), /* mulsi_const9 */
1021 COSTS_N_INSNS (4), /* muldi */
1022 COSTS_N_INSNS (19), /* divsi */
1023 COSTS_N_INSNS (19), /* divdi */
1024 COSTS_N_INSNS (4), /* fp */
1025 COSTS_N_INSNS (4), /* dmul */
1026 COSTS_N_INSNS (29), /* sdiv */
1027 COSTS_N_INSNS (29), /* ddiv */
1028 32, /* cache line size */
1029 32, /* l1 cache */
1030 256, /* l2 cache */
1031 1, /* prefetch streams /*/
1032 0, /* SF->DF convert */
1035 /* Instruction costs on E300C2 and E300C3 cores. */
1036 static const
1037 struct processor_costs ppce300c2c3_cost = {
1038 COSTS_N_INSNS (4), /* mulsi */
1039 COSTS_N_INSNS (4), /* mulsi_const */
1040 COSTS_N_INSNS (4), /* mulsi_const9 */
1041 COSTS_N_INSNS (4), /* muldi */
1042 COSTS_N_INSNS (19), /* divsi */
1043 COSTS_N_INSNS (19), /* divdi */
1044 COSTS_N_INSNS (3), /* fp */
1045 COSTS_N_INSNS (4), /* dmul */
1046 COSTS_N_INSNS (18), /* sdiv */
1047 COSTS_N_INSNS (33), /* ddiv */
1049 16, /* l1 cache */
1050 16, /* l2 cache */
1051 1, /* prefetch streams /*/
1052 0, /* SF->DF convert */
1055 /* Instruction costs on PPCE500MC processors. */
1056 static const
1057 struct processor_costs ppce500mc_cost = {
1058 COSTS_N_INSNS (4), /* mulsi */
1059 COSTS_N_INSNS (4), /* mulsi_const */
1060 COSTS_N_INSNS (4), /* mulsi_const9 */
1061 COSTS_N_INSNS (4), /* muldi */
1062 COSTS_N_INSNS (14), /* divsi */
1063 COSTS_N_INSNS (14), /* divdi */
1064 COSTS_N_INSNS (8), /* fp */
1065 COSTS_N_INSNS (10), /* dmul */
1066 COSTS_N_INSNS (36), /* sdiv */
1067 COSTS_N_INSNS (66), /* ddiv */
1068 64, /* cache line size */
1069 32, /* l1 cache */
1070 128, /* l2 cache */
1071 1, /* prefetch streams /*/
1072 0, /* SF->DF convert */
1075 /* Instruction costs on PPCE500MC64 processors. */
1076 static const
1077 struct processor_costs ppce500mc64_cost = {
1078 COSTS_N_INSNS (4), /* mulsi */
1079 COSTS_N_INSNS (4), /* mulsi_const */
1080 COSTS_N_INSNS (4), /* mulsi_const9 */
1081 COSTS_N_INSNS (4), /* muldi */
1082 COSTS_N_INSNS (14), /* divsi */
1083 COSTS_N_INSNS (14), /* divdi */
1084 COSTS_N_INSNS (4), /* fp */
1085 COSTS_N_INSNS (10), /* dmul */
1086 COSTS_N_INSNS (36), /* sdiv */
1087 COSTS_N_INSNS (66), /* ddiv */
1088 64, /* cache line size */
1089 32, /* l1 cache */
1090 128, /* l2 cache */
1091 1, /* prefetch streams /*/
1092 0, /* SF->DF convert */
1095 /* Instruction costs on PPCE5500 processors. */
1096 static const
1097 struct processor_costs ppce5500_cost = {
1098 COSTS_N_INSNS (5), /* mulsi */
1099 COSTS_N_INSNS (5), /* mulsi_const */
1100 COSTS_N_INSNS (4), /* mulsi_const9 */
1101 COSTS_N_INSNS (5), /* muldi */
1102 COSTS_N_INSNS (14), /* divsi */
1103 COSTS_N_INSNS (14), /* divdi */
1104 COSTS_N_INSNS (7), /* fp */
1105 COSTS_N_INSNS (10), /* dmul */
1106 COSTS_N_INSNS (36), /* sdiv */
1107 COSTS_N_INSNS (66), /* ddiv */
1108 64, /* cache line size */
1109 32, /* l1 cache */
1110 128, /* l2 cache */
1111 1, /* prefetch streams /*/
1112 0, /* SF->DF convert */
1115 /* Instruction costs on PPCE6500 processors. */
1116 static const
1117 struct processor_costs ppce6500_cost = {
1118 COSTS_N_INSNS (5), /* mulsi */
1119 COSTS_N_INSNS (5), /* mulsi_const */
1120 COSTS_N_INSNS (4), /* mulsi_const9 */
1121 COSTS_N_INSNS (5), /* muldi */
1122 COSTS_N_INSNS (14), /* divsi */
1123 COSTS_N_INSNS (14), /* divdi */
1124 COSTS_N_INSNS (7), /* fp */
1125 COSTS_N_INSNS (10), /* dmul */
1126 COSTS_N_INSNS (36), /* sdiv */
1127 COSTS_N_INSNS (66), /* ddiv */
1128 64, /* cache line size */
1129 32, /* l1 cache */
1130 128, /* l2 cache */
1131 1, /* prefetch streams /*/
1132 0, /* SF->DF convert */
1135 /* Instruction costs on AppliedMicro Titan processors. */
1136 static const
1137 struct processor_costs titan_cost = {
1138 COSTS_N_INSNS (5), /* mulsi */
1139 COSTS_N_INSNS (5), /* mulsi_const */
1140 COSTS_N_INSNS (5), /* mulsi_const9 */
1141 COSTS_N_INSNS (5), /* muldi */
1142 COSTS_N_INSNS (18), /* divsi */
1143 COSTS_N_INSNS (18), /* divdi */
1144 COSTS_N_INSNS (10), /* fp */
1145 COSTS_N_INSNS (10), /* dmul */
1146 COSTS_N_INSNS (46), /* sdiv */
1147 COSTS_N_INSNS (72), /* ddiv */
1148 32, /* cache line size */
1149 32, /* l1 cache */
1150 512, /* l2 cache */
1151 1, /* prefetch streams /*/
1152 0, /* SF->DF convert */
1155 /* Instruction costs on POWER4 and POWER5 processors. */
1156 static const
1157 struct processor_costs power4_cost = {
1158 COSTS_N_INSNS (3), /* mulsi */
1159 COSTS_N_INSNS (2), /* mulsi_const */
1160 COSTS_N_INSNS (2), /* mulsi_const9 */
1161 COSTS_N_INSNS (4), /* muldi */
1162 COSTS_N_INSNS (18), /* divsi */
1163 COSTS_N_INSNS (34), /* divdi */
1164 COSTS_N_INSNS (3), /* fp */
1165 COSTS_N_INSNS (3), /* dmul */
1166 COSTS_N_INSNS (17), /* sdiv */
1167 COSTS_N_INSNS (17), /* ddiv */
1168 128, /* cache line size */
1169 32, /* l1 cache */
1170 1024, /* l2 cache */
1171 8, /* prefetch streams /*/
1172 0, /* SF->DF convert */
1175 /* Instruction costs on POWER6 processors. */
1176 static const
1177 struct processor_costs power6_cost = {
1178 COSTS_N_INSNS (8), /* mulsi */
1179 COSTS_N_INSNS (8), /* mulsi_const */
1180 COSTS_N_INSNS (8), /* mulsi_const9 */
1181 COSTS_N_INSNS (8), /* muldi */
1182 COSTS_N_INSNS (22), /* divsi */
1183 COSTS_N_INSNS (28), /* divdi */
1184 COSTS_N_INSNS (3), /* fp */
1185 COSTS_N_INSNS (3), /* dmul */
1186 COSTS_N_INSNS (13), /* sdiv */
1187 COSTS_N_INSNS (16), /* ddiv */
1188 128, /* cache line size */
1189 64, /* l1 cache */
1190 2048, /* l2 cache */
1191 16, /* prefetch streams */
1192 0, /* SF->DF convert */
1195 /* Instruction costs on POWER7 processors. */
1196 static const
1197 struct processor_costs power7_cost = {
1198 COSTS_N_INSNS (2), /* mulsi */
1199 COSTS_N_INSNS (2), /* mulsi_const */
1200 COSTS_N_INSNS (2), /* mulsi_const9 */
1201 COSTS_N_INSNS (2), /* muldi */
1202 COSTS_N_INSNS (18), /* divsi */
1203 COSTS_N_INSNS (34), /* divdi */
1204 COSTS_N_INSNS (3), /* fp */
1205 COSTS_N_INSNS (3), /* dmul */
1206 COSTS_N_INSNS (13), /* sdiv */
1207 COSTS_N_INSNS (16), /* ddiv */
1208 128, /* cache line size */
1209 32, /* l1 cache */
1210 256, /* l2 cache */
1211 12, /* prefetch streams */
1212 COSTS_N_INSNS (3), /* SF->DF convert */
1215 /* Instruction costs on POWER8 processors. */
1216 static const
1217 struct processor_costs power8_cost = {
1218 COSTS_N_INSNS (3), /* mulsi */
1219 COSTS_N_INSNS (3), /* mulsi_const */
1220 COSTS_N_INSNS (3), /* mulsi_const9 */
1221 COSTS_N_INSNS (3), /* muldi */
1222 COSTS_N_INSNS (19), /* divsi */
1223 COSTS_N_INSNS (35), /* divdi */
1224 COSTS_N_INSNS (3), /* fp */
1225 COSTS_N_INSNS (3), /* dmul */
1226 COSTS_N_INSNS (14), /* sdiv */
1227 COSTS_N_INSNS (17), /* ddiv */
1228 128, /* cache line size */
1229 32, /* l1 cache */
1230 256, /* l2 cache */
1231 12, /* prefetch streams */
1232 COSTS_N_INSNS (3), /* SF->DF convert */
1235 /* Instruction costs on POWER9 processors. */
1236 static const
1237 struct processor_costs power9_cost = {
1238 COSTS_N_INSNS (3), /* mulsi */
1239 COSTS_N_INSNS (3), /* mulsi_const */
1240 COSTS_N_INSNS (3), /* mulsi_const9 */
1241 COSTS_N_INSNS (3), /* muldi */
1242 COSTS_N_INSNS (8), /* divsi */
1243 COSTS_N_INSNS (12), /* divdi */
1244 COSTS_N_INSNS (3), /* fp */
1245 COSTS_N_INSNS (3), /* dmul */
1246 COSTS_N_INSNS (13), /* sdiv */
1247 COSTS_N_INSNS (18), /* ddiv */
1248 128, /* cache line size */
1249 32, /* l1 cache */
1250 512, /* l2 cache */
1251 8, /* prefetch streams */
1252 COSTS_N_INSNS (3), /* SF->DF convert */
1255 /* Instruction costs on POWER A2 processors. */
1256 static const
1257 struct processor_costs ppca2_cost = {
1258 COSTS_N_INSNS (16), /* mulsi */
1259 COSTS_N_INSNS (16), /* mulsi_const */
1260 COSTS_N_INSNS (16), /* mulsi_const9 */
1261 COSTS_N_INSNS (16), /* muldi */
1262 COSTS_N_INSNS (22), /* divsi */
1263 COSTS_N_INSNS (28), /* divdi */
1264 COSTS_N_INSNS (3), /* fp */
1265 COSTS_N_INSNS (3), /* dmul */
1266 COSTS_N_INSNS (59), /* sdiv */
1267 COSTS_N_INSNS (72), /* ddiv */
1269 16, /* l1 cache */
1270 2048, /* l2 cache */
1271 16, /* prefetch streams */
1272 0, /* SF->DF convert */
1276 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1277 #undef RS6000_BUILTIN_0
1278 #undef RS6000_BUILTIN_1
1279 #undef RS6000_BUILTIN_2
1280 #undef RS6000_BUILTIN_3
1281 #undef RS6000_BUILTIN_A
1282 #undef RS6000_BUILTIN_D
1283 #undef RS6000_BUILTIN_H
1284 #undef RS6000_BUILTIN_P
1285 #undef RS6000_BUILTIN_X
1287 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1288 { NAME, ICODE, MASK, ATTR },
1290 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1291 { NAME, ICODE, MASK, ATTR },
1293 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1294 { NAME, ICODE, MASK, ATTR },
1296 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1297 { NAME, ICODE, MASK, ATTR },
1299 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1300 { NAME, ICODE, MASK, ATTR },
1302 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1303 { NAME, ICODE, MASK, ATTR },
1305 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1306 { NAME, ICODE, MASK, ATTR },
1308 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1309 { NAME, ICODE, MASK, ATTR },
1311 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1312 { NAME, ICODE, MASK, ATTR },
1314 struct rs6000_builtin_info_type {
1315 const char *name;
1316 const enum insn_code icode;
1317 const HOST_WIDE_INT mask;
1318 const unsigned attr;
1321 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1323 #include "rs6000-builtin.def"
1326 #undef RS6000_BUILTIN_0
1327 #undef RS6000_BUILTIN_1
1328 #undef RS6000_BUILTIN_2
1329 #undef RS6000_BUILTIN_3
1330 #undef RS6000_BUILTIN_A
1331 #undef RS6000_BUILTIN_D
1332 #undef RS6000_BUILTIN_H
1333 #undef RS6000_BUILTIN_P
1334 #undef RS6000_BUILTIN_X
1336 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1337 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1340 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1341 static struct machine_function * rs6000_init_machine_status (void);
1342 static int rs6000_ra_ever_killed (void);
1343 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1344 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1345 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1346 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1347 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1348 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1349 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1350 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1351 bool);
1352 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1353 unsigned int);
1354 static bool is_microcoded_insn (rtx_insn *);
1355 static bool is_nonpipeline_insn (rtx_insn *);
1356 static bool is_cracked_insn (rtx_insn *);
1357 static bool is_load_insn (rtx, rtx *);
1358 static bool is_store_insn (rtx, rtx *);
1359 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1360 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1361 static bool insn_must_be_first_in_group (rtx_insn *);
1362 static bool insn_must_be_last_in_group (rtx_insn *);
1363 static void altivec_init_builtins (void);
1364 static tree builtin_function_type (machine_mode, machine_mode,
1365 machine_mode, machine_mode,
1366 enum rs6000_builtins, const char *name);
1367 static void rs6000_common_init_builtins (void);
1368 static void htm_init_builtins (void);
1369 static rs6000_stack_t *rs6000_stack_info (void);
1370 static void is_altivec_return_reg (rtx, void *);
1371 int easy_vector_constant (rtx, machine_mode);
1372 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1373 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1374 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1375 bool, bool);
1376 #if TARGET_MACHO
1377 static void macho_branch_islands (void);
1378 #endif
1379 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1380 int, int *);
1381 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1382 int, int, int *);
1383 static bool rs6000_mode_dependent_address (const_rtx);
1384 static bool rs6000_debug_mode_dependent_address (const_rtx);
1385 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1386 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1387 machine_mode, rtx);
1388 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1389 machine_mode,
1390 rtx);
1391 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1392 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1393 enum reg_class);
1394 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1395 reg_class_t,
1396 reg_class_t);
1397 static bool rs6000_debug_can_change_mode_class (machine_mode,
1398 machine_mode,
1399 reg_class_t);
1400 static bool rs6000_save_toc_in_prologue_p (void);
1401 static rtx rs6000_internal_arg_pointer (void);
1403 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1404 int, int *)
1405 = rs6000_legitimize_reload_address;
1407 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1408 = rs6000_mode_dependent_address;
1410 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1411 machine_mode, rtx)
1412 = rs6000_secondary_reload_class;
1414 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1415 = rs6000_preferred_reload_class;
1417 const int INSN_NOT_AVAILABLE = -1;
1419 static void rs6000_print_isa_options (FILE *, int, const char *,
1420 HOST_WIDE_INT);
1421 static void rs6000_print_builtin_options (FILE *, int, const char *,
1422 HOST_WIDE_INT);
1423 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1425 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1426 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1427 enum rs6000_reg_type,
1428 machine_mode,
1429 secondary_reload_info *,
1430 bool);
1431 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1432 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1433 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1435 /* Hash table stuff for keeping track of TOC entries. */
1437 struct GTY((for_user)) toc_hash_struct
1439 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1440 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1441 rtx key;
1442 machine_mode key_mode;
1443 int labelno;
1446 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1448 static hashval_t hash (toc_hash_struct *);
1449 static bool equal (toc_hash_struct *, toc_hash_struct *);
1452 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1454 /* Hash table to keep track of the argument types for builtin functions. */
1456 struct GTY((for_user)) builtin_hash_struct
1458 tree type;
1459 machine_mode mode[4]; /* return value + 3 arguments. */
1460 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1463 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1465 static hashval_t hash (builtin_hash_struct *);
1466 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1469 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1472 /* Default register names. */
1473 char rs6000_reg_names[][8] =
1475 "0", "1", "2", "3", "4", "5", "6", "7",
1476 "8", "9", "10", "11", "12", "13", "14", "15",
1477 "16", "17", "18", "19", "20", "21", "22", "23",
1478 "24", "25", "26", "27", "28", "29", "30", "31",
1479 "0", "1", "2", "3", "4", "5", "6", "7",
1480 "8", "9", "10", "11", "12", "13", "14", "15",
1481 "16", "17", "18", "19", "20", "21", "22", "23",
1482 "24", "25", "26", "27", "28", "29", "30", "31",
1483 "mq", "lr", "ctr","ap",
1484 "0", "1", "2", "3", "4", "5", "6", "7",
1485 "ca",
1486 /* AltiVec registers. */
1487 "0", "1", "2", "3", "4", "5", "6", "7",
1488 "8", "9", "10", "11", "12", "13", "14", "15",
1489 "16", "17", "18", "19", "20", "21", "22", "23",
1490 "24", "25", "26", "27", "28", "29", "30", "31",
1491 "vrsave", "vscr",
1492 /* Soft frame pointer. */
1493 "sfp",
1494 /* HTM SPR registers. */
1495 "tfhar", "tfiar", "texasr"
1498 #ifdef TARGET_REGNAMES
1499 static const char alt_reg_names[][8] =
1501 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1502 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1503 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1504 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1505 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1506 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1507 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1508 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1509 "mq", "lr", "ctr", "ap",
1510 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1511 "ca",
1512 /* AltiVec registers. */
1513 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1514 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1515 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1516 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1517 "vrsave", "vscr",
1518 /* Soft frame pointer. */
1519 "sfp",
1520 /* HTM SPR registers. */
1521 "tfhar", "tfiar", "texasr"
1523 #endif
1525 /* Table of valid machine attributes. */
1527 static const struct attribute_spec rs6000_attribute_table[] =
1529 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1530 affects_type_identity, handler, exclude } */
1531 { "altivec", 1, 1, false, true, false, false,
1532 rs6000_handle_altivec_attribute, NULL },
1533 { "longcall", 0, 0, false, true, true, false,
1534 rs6000_handle_longcall_attribute, NULL },
1535 { "shortcall", 0, 0, false, true, true, false,
1536 rs6000_handle_longcall_attribute, NULL },
1537 { "ms_struct", 0, 0, false, false, false, false,
1538 rs6000_handle_struct_attribute, NULL },
1539 { "gcc_struct", 0, 0, false, false, false, false,
1540 rs6000_handle_struct_attribute, NULL },
1541 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1542 SUBTARGET_ATTRIBUTE_TABLE,
1543 #endif
1544 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1547 #ifndef TARGET_PROFILE_KERNEL
1548 #define TARGET_PROFILE_KERNEL 0
1549 #endif
1551 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1552 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1554 /* Initialize the GCC target structure. */
1555 #undef TARGET_ATTRIBUTE_TABLE
1556 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1557 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1558 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1559 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1560 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1562 #undef TARGET_ASM_ALIGNED_DI_OP
1563 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1565 /* Default unaligned ops are only provided for ELF. Find the ops needed
1566 for non-ELF systems. */
1567 #ifndef OBJECT_FORMAT_ELF
1568 #if TARGET_XCOFF
1569 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1570 64-bit targets. */
1571 #undef TARGET_ASM_UNALIGNED_HI_OP
1572 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1573 #undef TARGET_ASM_UNALIGNED_SI_OP
1574 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1575 #undef TARGET_ASM_UNALIGNED_DI_OP
1576 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1577 #else
1578 /* For Darwin. */
1579 #undef TARGET_ASM_UNALIGNED_HI_OP
1580 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1581 #undef TARGET_ASM_UNALIGNED_SI_OP
1582 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1583 #undef TARGET_ASM_UNALIGNED_DI_OP
1584 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1585 #undef TARGET_ASM_ALIGNED_DI_OP
1586 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1587 #endif
1588 #endif
1590 /* This hook deals with fixups for relocatable code and DI-mode objects
1591 in 64-bit code. */
1592 #undef TARGET_ASM_INTEGER
1593 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1595 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1596 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1597 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1598 #endif
1600 #undef TARGET_SET_UP_BY_PROLOGUE
1601 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1603 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1604 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1605 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1606 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1607 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1608 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1609 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1610 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1611 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1612 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1613 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1614 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1616 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1617 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1619 #undef TARGET_INTERNAL_ARG_POINTER
1620 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1622 #undef TARGET_HAVE_TLS
1623 #define TARGET_HAVE_TLS HAVE_AS_TLS
1625 #undef TARGET_CANNOT_FORCE_CONST_MEM
1626 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1628 #undef TARGET_DELEGITIMIZE_ADDRESS
1629 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1631 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1632 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1634 #undef TARGET_LEGITIMATE_COMBINED_INSN
1635 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1637 #undef TARGET_ASM_FUNCTION_PROLOGUE
1638 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1639 #undef TARGET_ASM_FUNCTION_EPILOGUE
1640 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1642 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1643 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1645 #undef TARGET_LEGITIMIZE_ADDRESS
1646 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1648 #undef TARGET_SCHED_VARIABLE_ISSUE
1649 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1651 #undef TARGET_SCHED_ISSUE_RATE
1652 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1653 #undef TARGET_SCHED_ADJUST_COST
1654 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1655 #undef TARGET_SCHED_ADJUST_PRIORITY
1656 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1657 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1658 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1659 #undef TARGET_SCHED_INIT
1660 #define TARGET_SCHED_INIT rs6000_sched_init
1661 #undef TARGET_SCHED_FINISH
1662 #define TARGET_SCHED_FINISH rs6000_sched_finish
1663 #undef TARGET_SCHED_REORDER
1664 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1665 #undef TARGET_SCHED_REORDER2
1666 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1668 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1669 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1671 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1672 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1674 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1675 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1676 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1677 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1678 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1679 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1680 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1681 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1683 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1684 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1686 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1687 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1688 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1689 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1690 rs6000_builtin_support_vector_misalignment
1691 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1692 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1693 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1694 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1695 rs6000_builtin_vectorization_cost
1696 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1697 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1698 rs6000_preferred_simd_mode
1699 #undef TARGET_VECTORIZE_INIT_COST
1700 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1701 #undef TARGET_VECTORIZE_ADD_STMT_COST
1702 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1703 #undef TARGET_VECTORIZE_FINISH_COST
1704 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1705 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1706 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1708 #undef TARGET_INIT_BUILTINS
1709 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1710 #undef TARGET_BUILTIN_DECL
1711 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1713 #undef TARGET_FOLD_BUILTIN
1714 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1715 #undef TARGET_GIMPLE_FOLD_BUILTIN
1716 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1718 #undef TARGET_EXPAND_BUILTIN
1719 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1721 #undef TARGET_MANGLE_TYPE
1722 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1724 #undef TARGET_INIT_LIBFUNCS
1725 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1727 #if TARGET_MACHO
1728 #undef TARGET_BINDS_LOCAL_P
1729 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1730 #endif
1732 #undef TARGET_MS_BITFIELD_LAYOUT_P
1733 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1735 #undef TARGET_ASM_OUTPUT_MI_THUNK
1736 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1738 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1739 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1741 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1742 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1744 #undef TARGET_REGISTER_MOVE_COST
1745 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1746 #undef TARGET_MEMORY_MOVE_COST
1747 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1748 #undef TARGET_CANNOT_COPY_INSN_P
1749 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1750 #undef TARGET_RTX_COSTS
1751 #define TARGET_RTX_COSTS rs6000_rtx_costs
1752 #undef TARGET_ADDRESS_COST
1753 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1754 #undef TARGET_INSN_COST
1755 #define TARGET_INSN_COST rs6000_insn_cost
1757 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1758 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1760 #undef TARGET_PROMOTE_FUNCTION_MODE
1761 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1763 #undef TARGET_RETURN_IN_MEMORY
1764 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1766 #undef TARGET_RETURN_IN_MSB
1767 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1769 #undef TARGET_SETUP_INCOMING_VARARGS
1770 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1772 /* Always strict argument naming on rs6000. */
1773 #undef TARGET_STRICT_ARGUMENT_NAMING
1774 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1775 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1776 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1777 #undef TARGET_SPLIT_COMPLEX_ARG
1778 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1779 #undef TARGET_MUST_PASS_IN_STACK
1780 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1781 #undef TARGET_PASS_BY_REFERENCE
1782 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1783 #undef TARGET_ARG_PARTIAL_BYTES
1784 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1785 #undef TARGET_FUNCTION_ARG_ADVANCE
1786 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1787 #undef TARGET_FUNCTION_ARG
1788 #define TARGET_FUNCTION_ARG rs6000_function_arg
1789 #undef TARGET_FUNCTION_ARG_PADDING
1790 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1791 #undef TARGET_FUNCTION_ARG_BOUNDARY
1792 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1794 #undef TARGET_BUILD_BUILTIN_VA_LIST
1795 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1797 #undef TARGET_EXPAND_BUILTIN_VA_START
1798 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1800 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1801 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1803 #undef TARGET_EH_RETURN_FILTER_MODE
1804 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1806 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1807 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1809 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1810 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1812 #undef TARGET_FLOATN_MODE
1813 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1815 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1816 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1818 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1819 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1821 #undef TARGET_MD_ASM_ADJUST
1822 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1824 #undef TARGET_OPTION_OVERRIDE
1825 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1827 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1828 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1829 rs6000_builtin_vectorized_function
1831 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1832 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1833 rs6000_builtin_md_vectorized_function
1835 #undef TARGET_STACK_PROTECT_GUARD
1836 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1838 #if !TARGET_MACHO
1839 #undef TARGET_STACK_PROTECT_FAIL
1840 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1841 #endif
1843 #ifdef HAVE_AS_TLS
1844 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1845 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1846 #endif
1848 /* Use a 32-bit anchor range. This leads to sequences like:
1850 addis tmp,anchor,high
1851 add dest,tmp,low
1853 where tmp itself acts as an anchor, and can be shared between
1854 accesses to the same 64k page. */
1855 #undef TARGET_MIN_ANCHOR_OFFSET
1856 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1857 #undef TARGET_MAX_ANCHOR_OFFSET
1858 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1859 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1860 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1861 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1862 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1864 #undef TARGET_BUILTIN_RECIPROCAL
1865 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1867 #undef TARGET_SECONDARY_RELOAD
1868 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1869 #undef TARGET_SECONDARY_MEMORY_NEEDED
1870 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1871 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1872 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1874 #undef TARGET_LEGITIMATE_ADDRESS_P
1875 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1877 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1878 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1880 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1881 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1883 #undef TARGET_CAN_ELIMINATE
1884 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1886 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1887 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1889 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1890 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1892 #undef TARGET_TRAMPOLINE_INIT
1893 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1895 #undef TARGET_FUNCTION_VALUE
1896 #define TARGET_FUNCTION_VALUE rs6000_function_value
1898 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1899 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1901 #undef TARGET_OPTION_SAVE
1902 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1904 #undef TARGET_OPTION_RESTORE
1905 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1907 #undef TARGET_OPTION_PRINT
1908 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1910 #undef TARGET_CAN_INLINE_P
1911 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1913 #undef TARGET_SET_CURRENT_FUNCTION
1914 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1916 #undef TARGET_LEGITIMATE_CONSTANT_P
1917 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1919 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1920 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1922 #undef TARGET_CAN_USE_DOLOOP_P
1923 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1925 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1926 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1928 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1929 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1930 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1931 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1932 #undef TARGET_UNWIND_WORD_MODE
1933 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1935 #undef TARGET_OFFLOAD_OPTIONS
1936 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1938 #undef TARGET_C_MODE_FOR_SUFFIX
1939 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1941 #undef TARGET_INVALID_BINARY_OP
1942 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1944 #undef TARGET_OPTAB_SUPPORTED_P
1945 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1947 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1948 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1950 #undef TARGET_COMPARE_VERSION_PRIORITY
1951 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1953 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1954 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1955 rs6000_generate_version_dispatcher_body
1957 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1958 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1959 rs6000_get_function_versions_dispatcher
1961 #undef TARGET_OPTION_FUNCTION_VERSIONS
1962 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1964 #undef TARGET_HARD_REGNO_NREGS
1965 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1966 #undef TARGET_HARD_REGNO_MODE_OK
1967 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1969 #undef TARGET_MODES_TIEABLE_P
1970 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1972 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1973 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1974 rs6000_hard_regno_call_part_clobbered
1976 #undef TARGET_SLOW_UNALIGNED_ACCESS
1977 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1979 #undef TARGET_CAN_CHANGE_MODE_CLASS
1980 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1982 #undef TARGET_CONSTANT_ALIGNMENT
1983 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1985 #undef TARGET_STARTING_FRAME_OFFSET
1986 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1988 #if TARGET_ELF && RS6000_WEAK
1989 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1990 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1991 #endif
1994 /* Processor table. */
1995 struct rs6000_ptt
1997 const char *const name; /* Canonical processor name. */
1998 const enum processor_type processor; /* Processor type enum value. */
1999 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
2002 static struct rs6000_ptt const processor_target_table[] =
2004 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
2005 #include "rs6000-cpus.def"
2006 #undef RS6000_CPU
2009 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2010 name is invalid. */
2012 static int
2013 rs6000_cpu_name_lookup (const char *name)
2015 size_t i;
2017 if (name != NULL)
2019 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2020 if (! strcmp (name, processor_target_table[i].name))
2021 return (int)i;
2024 return -1;
2028 /* Return number of consecutive hard regs needed starting at reg REGNO
2029 to hold something of mode MODE.
2030 This is ordinarily the length in words of a value of mode MODE
2031 but can be less for certain modes in special long registers.
2033 POWER and PowerPC GPRs hold 32 bits worth;
2034 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2036 static int
2037 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2039 unsigned HOST_WIDE_INT reg_size;
2041 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2042 128-bit floating point that can go in vector registers, which has VSX
2043 memory addressing. */
2044 if (FP_REGNO_P (regno))
2045 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2046 ? UNITS_PER_VSX_WORD
2047 : UNITS_PER_FP_WORD);
2049 else if (ALTIVEC_REGNO_P (regno))
2050 reg_size = UNITS_PER_ALTIVEC_WORD;
2052 else
2053 reg_size = UNITS_PER_WORD;
2055 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2058 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2059 MODE. */
2060 static int
2061 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2063 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2065 if (COMPLEX_MODE_P (mode))
2066 mode = GET_MODE_INNER (mode);
2068 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2069 register combinations, and use PTImode where we need to deal with quad
2070 word memory operations. Don't allow quad words in the argument or frame
2071 pointer registers, just registers 0..31. */
2072 if (mode == PTImode)
2073 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2074 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2075 && ((regno & 1) == 0));
2077 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2078 implementations. Don't allow an item to be split between a FP register
2079 and an Altivec register. Allow TImode in all VSX registers if the user
2080 asked for it. */
2081 if (TARGET_VSX && VSX_REGNO_P (regno)
2082 && (VECTOR_MEM_VSX_P (mode)
2083 || FLOAT128_VECTOR_P (mode)
2084 || reg_addr[mode].scalar_in_vmx_p
2085 || mode == TImode
2086 || (TARGET_VADDUQM && mode == V1TImode)))
2088 if (FP_REGNO_P (regno))
2089 return FP_REGNO_P (last_regno);
2091 if (ALTIVEC_REGNO_P (regno))
2093 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2094 return 0;
2096 return ALTIVEC_REGNO_P (last_regno);
2100 /* The GPRs can hold any mode, but values bigger than one register
2101 cannot go past R31. */
2102 if (INT_REGNO_P (regno))
2103 return INT_REGNO_P (last_regno);
2105 /* The float registers (except for VSX vector modes) can only hold floating
2106 modes and DImode. */
2107 if (FP_REGNO_P (regno))
2109 if (FLOAT128_VECTOR_P (mode))
2110 return false;
2112 if (SCALAR_FLOAT_MODE_P (mode)
2113 && (mode != TDmode || (regno % 2) == 0)
2114 && FP_REGNO_P (last_regno))
2115 return 1;
2117 if (GET_MODE_CLASS (mode) == MODE_INT)
2119 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2120 return 1;
2122 if (TARGET_P8_VECTOR && (mode == SImode))
2123 return 1;
2125 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2126 return 1;
2129 return 0;
2132 /* The CR register can only hold CC modes. */
2133 if (CR_REGNO_P (regno))
2134 return GET_MODE_CLASS (mode) == MODE_CC;
2136 if (CA_REGNO_P (regno))
2137 return mode == Pmode || mode == SImode;
2139 /* AltiVec only in AldyVec registers. */
2140 if (ALTIVEC_REGNO_P (regno))
2141 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2142 || mode == V1TImode);
2144 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2145 and it must be able to fit within the register set. */
2147 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2150 /* Implement TARGET_HARD_REGNO_NREGS. */
2152 static unsigned int
2153 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2155 return rs6000_hard_regno_nregs[mode][regno];
2158 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2160 static bool
2161 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2163 return rs6000_hard_regno_mode_ok_p[mode][regno];
2166 /* Implement TARGET_MODES_TIEABLE_P.
2168 PTImode cannot tie with other modes because PTImode is restricted to even
2169 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2170 57744).
2172 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2173 128-bit floating point on VSX systems ties with other vectors. */
2175 static bool
2176 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2178 if (mode1 == PTImode)
2179 return mode2 == PTImode;
2180 if (mode2 == PTImode)
2181 return false;
2183 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2184 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2185 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2186 return false;
2188 if (SCALAR_FLOAT_MODE_P (mode1))
2189 return SCALAR_FLOAT_MODE_P (mode2);
2190 if (SCALAR_FLOAT_MODE_P (mode2))
2191 return false;
2193 if (GET_MODE_CLASS (mode1) == MODE_CC)
2194 return GET_MODE_CLASS (mode2) == MODE_CC;
2195 if (GET_MODE_CLASS (mode2) == MODE_CC)
2196 return false;
2198 return true;
2201 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2203 static bool
2204 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2206 if (TARGET_32BIT
2207 && TARGET_POWERPC64
2208 && GET_MODE_SIZE (mode) > 4
2209 && INT_REGNO_P (regno))
2210 return true;
2212 if (TARGET_VSX
2213 && FP_REGNO_P (regno)
2214 && GET_MODE_SIZE (mode) > 8
2215 && !FLOAT128_2REG_P (mode))
2216 return true;
2218 return false;
2221 /* Print interesting facts about registers. */
2222 static void
2223 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2225 int r, m;
2227 for (r = first_regno; r <= last_regno; ++r)
2229 const char *comma = "";
2230 int len;
2232 if (first_regno == last_regno)
2233 fprintf (stderr, "%s:\t", reg_name);
2234 else
2235 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2237 len = 8;
2238 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2239 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2241 if (len > 70)
2243 fprintf (stderr, ",\n\t");
2244 len = 8;
2245 comma = "";
2248 if (rs6000_hard_regno_nregs[m][r] > 1)
2249 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2250 rs6000_hard_regno_nregs[m][r]);
2251 else
2252 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2254 comma = ", ";
2257 if (call_used_regs[r])
2259 if (len > 70)
2261 fprintf (stderr, ",\n\t");
2262 len = 8;
2263 comma = "";
2266 len += fprintf (stderr, "%s%s", comma, "call-used");
2267 comma = ", ";
2270 if (fixed_regs[r])
2272 if (len > 70)
2274 fprintf (stderr, ",\n\t");
2275 len = 8;
2276 comma = "";
2279 len += fprintf (stderr, "%s%s", comma, "fixed");
2280 comma = ", ";
2283 if (len > 70)
2285 fprintf (stderr, ",\n\t");
2286 comma = "";
2289 len += fprintf (stderr, "%sreg-class = %s", comma,
2290 reg_class_names[(int)rs6000_regno_regclass[r]]);
2291 comma = ", ";
2293 if (len > 70)
2295 fprintf (stderr, ",\n\t");
2296 comma = "";
2299 fprintf (stderr, "%sregno = %d\n", comma, r);
2303 static const char *
2304 rs6000_debug_vector_unit (enum rs6000_vector v)
2306 const char *ret;
2308 switch (v)
2310 case VECTOR_NONE: ret = "none"; break;
2311 case VECTOR_ALTIVEC: ret = "altivec"; break;
2312 case VECTOR_VSX: ret = "vsx"; break;
2313 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2314 case VECTOR_OTHER: ret = "other"; break;
2315 default: ret = "unknown"; break;
2318 return ret;
2321 /* Inner function printing just the address mask for a particular reload
2322 register class. */
2323 DEBUG_FUNCTION char *
2324 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2326 static char ret[8];
2327 char *p = ret;
2329 if ((mask & RELOAD_REG_VALID) != 0)
2330 *p++ = 'v';
2331 else if (keep_spaces)
2332 *p++ = ' ';
2334 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2335 *p++ = 'm';
2336 else if (keep_spaces)
2337 *p++ = ' ';
2339 if ((mask & RELOAD_REG_INDEXED) != 0)
2340 *p++ = 'i';
2341 else if (keep_spaces)
2342 *p++ = ' ';
2344 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2345 *p++ = 'O';
2346 else if ((mask & RELOAD_REG_OFFSET) != 0)
2347 *p++ = 'o';
2348 else if (keep_spaces)
2349 *p++ = ' ';
2351 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2352 *p++ = '+';
2353 else if (keep_spaces)
2354 *p++ = ' ';
2356 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2357 *p++ = '+';
2358 else if (keep_spaces)
2359 *p++ = ' ';
2361 if ((mask & RELOAD_REG_AND_M16) != 0)
2362 *p++ = '&';
2363 else if (keep_spaces)
2364 *p++ = ' ';
2366 *p = '\0';
2368 return ret;
2371 /* Print the address masks in a human readble fashion. */
2372 DEBUG_FUNCTION void
2373 rs6000_debug_print_mode (ssize_t m)
2375 ssize_t rc;
2376 int spaces = 0;
2377 bool fuse_extra_p;
2379 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2380 for (rc = 0; rc < N_RELOAD_REG; rc++)
2381 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2382 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2384 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2385 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2386 fprintf (stderr, " Reload=%c%c",
2387 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2388 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2389 else
2390 spaces += sizeof (" Reload=sl") - 1;
2392 if (reg_addr[m].scalar_in_vmx_p)
2394 fprintf (stderr, "%*s Upper=y", spaces, "");
2395 spaces = 0;
2397 else
2398 spaces += sizeof (" Upper=y") - 1;
2400 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2401 || reg_addr[m].fused_toc);
2402 if (!fuse_extra_p)
2404 for (rc = 0; rc < N_RELOAD_REG; rc++)
2406 if (rc != RELOAD_REG_ANY)
2408 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2409 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2410 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2411 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2412 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2414 fuse_extra_p = true;
2415 break;
2421 if (fuse_extra_p)
2423 fprintf (stderr, "%*s Fuse:", spaces, "");
2424 spaces = 0;
2426 for (rc = 0; rc < N_RELOAD_REG; rc++)
2428 if (rc != RELOAD_REG_ANY)
2430 char load, store;
2432 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2433 load = 'l';
2434 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2435 load = 'L';
2436 else
2437 load = '-';
2439 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2440 store = 's';
2441 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2442 store = 'S';
2443 else
2444 store = '-';
2446 if (load == '-' && store == '-')
2447 spaces += 5;
2448 else
2450 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2451 reload_reg_map[rc].name[0], load, store);
2452 spaces = 0;
2457 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2459 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2460 spaces = 0;
2462 else
2463 spaces += sizeof (" P8gpr") - 1;
2465 if (reg_addr[m].fused_toc)
2467 fprintf (stderr, "%*sToc", (spaces + 1), "");
2468 spaces = 0;
2470 else
2471 spaces += sizeof (" Toc") - 1;
2473 else
2474 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2476 if (rs6000_vector_unit[m] != VECTOR_NONE
2477 || rs6000_vector_mem[m] != VECTOR_NONE)
2479 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2480 spaces, "",
2481 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2482 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2485 fputs ("\n", stderr);
2488 #define DEBUG_FMT_ID "%-32s= "
2489 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2490 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2491 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2493 /* Print various interesting information with -mdebug=reg. */
2494 static void
2495 rs6000_debug_reg_global (void)
2497 static const char *const tf[2] = { "false", "true" };
2498 const char *nl = (const char *)0;
2499 int m;
2500 size_t m1, m2, v;
2501 char costly_num[20];
2502 char nop_num[20];
2503 char flags_buffer[40];
2504 const char *costly_str;
2505 const char *nop_str;
2506 const char *trace_str;
2507 const char *abi_str;
2508 const char *cmodel_str;
2509 struct cl_target_option cl_opts;
2511 /* Modes we want tieable information on. */
2512 static const machine_mode print_tieable_modes[] = {
2513 QImode,
2514 HImode,
2515 SImode,
2516 DImode,
2517 TImode,
2518 PTImode,
2519 SFmode,
2520 DFmode,
2521 TFmode,
2522 IFmode,
2523 KFmode,
2524 SDmode,
2525 DDmode,
2526 TDmode,
2527 V16QImode,
2528 V8HImode,
2529 V4SImode,
2530 V2DImode,
2531 V1TImode,
2532 V32QImode,
2533 V16HImode,
2534 V8SImode,
2535 V4DImode,
2536 V2TImode,
2537 V4SFmode,
2538 V2DFmode,
2539 V8SFmode,
2540 V4DFmode,
2541 CCmode,
2542 CCUNSmode,
2543 CCEQmode,
2546 /* Virtual regs we are interested in. */
2547 const static struct {
2548 int regno; /* register number. */
2549 const char *name; /* register name. */
2550 } virtual_regs[] = {
2551 { STACK_POINTER_REGNUM, "stack pointer:" },
2552 { TOC_REGNUM, "toc: " },
2553 { STATIC_CHAIN_REGNUM, "static chain: " },
2554 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2555 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2556 { ARG_POINTER_REGNUM, "arg pointer: " },
2557 { FRAME_POINTER_REGNUM, "frame pointer:" },
2558 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2559 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2560 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2561 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2562 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2563 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2564 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2565 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2566 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2569 fputs ("\nHard register information:\n", stderr);
2570 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2571 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2572 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2573 LAST_ALTIVEC_REGNO,
2574 "vs");
2575 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2576 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2577 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2578 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2579 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2580 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2582 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2583 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2584 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2586 fprintf (stderr,
2587 "\n"
2588 "d reg_class = %s\n"
2589 "f reg_class = %s\n"
2590 "v reg_class = %s\n"
2591 "wa reg_class = %s\n"
2592 "wb reg_class = %s\n"
2593 "wd reg_class = %s\n"
2594 "we reg_class = %s\n"
2595 "wf reg_class = %s\n"
2596 "wg reg_class = %s\n"
2597 "wh reg_class = %s\n"
2598 "wi reg_class = %s\n"
2599 "wj reg_class = %s\n"
2600 "wk reg_class = %s\n"
2601 "wl reg_class = %s\n"
2602 "wm reg_class = %s\n"
2603 "wo reg_class = %s\n"
2604 "wp reg_class = %s\n"
2605 "wq reg_class = %s\n"
2606 "wr reg_class = %s\n"
2607 "ws reg_class = %s\n"
2608 "wt reg_class = %s\n"
2609 "wu reg_class = %s\n"
2610 "wv reg_class = %s\n"
2611 "ww reg_class = %s\n"
2612 "wx reg_class = %s\n"
2613 "wy reg_class = %s\n"
2614 "wz reg_class = %s\n"
2615 "wA reg_class = %s\n"
2616 "wH reg_class = %s\n"
2617 "wI reg_class = %s\n"
2618 "wJ reg_class = %s\n"
2619 "wK reg_class = %s\n"
2620 "\n",
2621 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2622 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2623 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2624 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2625 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2626 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2627 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2628 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2629 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2630 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2631 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2632 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2633 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2634 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2635 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2636 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2637 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2638 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2639 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2640 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2641 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2642 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2643 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2644 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2645 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2646 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2647 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2648 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2649 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2650 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2651 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2652 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2654 nl = "\n";
2655 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2656 rs6000_debug_print_mode (m);
2658 fputs ("\n", stderr);
2660 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2662 machine_mode mode1 = print_tieable_modes[m1];
2663 bool first_time = true;
2665 nl = (const char *)0;
2666 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2668 machine_mode mode2 = print_tieable_modes[m2];
2669 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2671 if (first_time)
2673 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2674 nl = "\n";
2675 first_time = false;
2678 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2682 if (!first_time)
2683 fputs ("\n", stderr);
2686 if (nl)
2687 fputs (nl, stderr);
2689 if (rs6000_recip_control)
2691 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2693 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2694 if (rs6000_recip_bits[m])
2696 fprintf (stderr,
2697 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2698 GET_MODE_NAME (m),
2699 (RS6000_RECIP_AUTO_RE_P (m)
2700 ? "auto"
2701 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2702 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2703 ? "auto"
2704 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2707 fputs ("\n", stderr);
2710 if (rs6000_cpu_index >= 0)
2712 const char *name = processor_target_table[rs6000_cpu_index].name;
2713 HOST_WIDE_INT flags
2714 = processor_target_table[rs6000_cpu_index].target_enable;
2716 sprintf (flags_buffer, "-mcpu=%s flags", name);
2717 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2719 else
2720 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2722 if (rs6000_tune_index >= 0)
2724 const char *name = processor_target_table[rs6000_tune_index].name;
2725 HOST_WIDE_INT flags
2726 = processor_target_table[rs6000_tune_index].target_enable;
2728 sprintf (flags_buffer, "-mtune=%s flags", name);
2729 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2731 else
2732 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2734 cl_target_option_save (&cl_opts, &global_options);
2735 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2736 rs6000_isa_flags);
2738 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2739 rs6000_isa_flags_explicit);
2741 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2742 rs6000_builtin_mask);
2744 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2746 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2747 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2749 switch (rs6000_sched_costly_dep)
2751 case max_dep_latency:
2752 costly_str = "max_dep_latency";
2753 break;
2755 case no_dep_costly:
2756 costly_str = "no_dep_costly";
2757 break;
2759 case all_deps_costly:
2760 costly_str = "all_deps_costly";
2761 break;
2763 case true_store_to_load_dep_costly:
2764 costly_str = "true_store_to_load_dep_costly";
2765 break;
2767 case store_to_load_dep_costly:
2768 costly_str = "store_to_load_dep_costly";
2769 break;
2771 default:
2772 costly_str = costly_num;
2773 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2774 break;
2777 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2779 switch (rs6000_sched_insert_nops)
2781 case sched_finish_regroup_exact:
2782 nop_str = "sched_finish_regroup_exact";
2783 break;
2785 case sched_finish_pad_groups:
2786 nop_str = "sched_finish_pad_groups";
2787 break;
2789 case sched_finish_none:
2790 nop_str = "sched_finish_none";
2791 break;
2793 default:
2794 nop_str = nop_num;
2795 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2796 break;
2799 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2801 switch (rs6000_sdata)
2803 default:
2804 case SDATA_NONE:
2805 break;
2807 case SDATA_DATA:
2808 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2809 break;
2811 case SDATA_SYSV:
2812 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2813 break;
2815 case SDATA_EABI:
2816 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2817 break;
2821 switch (rs6000_traceback)
2823 case traceback_default: trace_str = "default"; break;
2824 case traceback_none: trace_str = "none"; break;
2825 case traceback_part: trace_str = "part"; break;
2826 case traceback_full: trace_str = "full"; break;
2827 default: trace_str = "unknown"; break;
2830 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2832 switch (rs6000_current_cmodel)
2834 case CMODEL_SMALL: cmodel_str = "small"; break;
2835 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2836 case CMODEL_LARGE: cmodel_str = "large"; break;
2837 default: cmodel_str = "unknown"; break;
2840 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2842 switch (rs6000_current_abi)
2844 case ABI_NONE: abi_str = "none"; break;
2845 case ABI_AIX: abi_str = "aix"; break;
2846 case ABI_ELFv2: abi_str = "ELFv2"; break;
2847 case ABI_V4: abi_str = "V4"; break;
2848 case ABI_DARWIN: abi_str = "darwin"; break;
2849 default: abi_str = "unknown"; break;
2852 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2854 if (rs6000_altivec_abi)
2855 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2857 if (rs6000_darwin64_abi)
2858 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2860 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2861 (TARGET_SOFT_FLOAT ? "true" : "false"));
2863 if (TARGET_LINK_STACK)
2864 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2866 if (TARGET_P8_FUSION)
2868 char options[80];
2870 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2871 if (TARGET_TOC_FUSION)
2872 strcat (options, ", toc");
2874 if (TARGET_P8_FUSION_SIGN)
2875 strcat (options, ", sign");
2877 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2880 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2881 TARGET_SECURE_PLT ? "secure" : "bss");
2882 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2883 aix_struct_return ? "aix" : "sysv");
2884 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2885 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2886 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2887 tf[!!rs6000_align_branch_targets]);
2888 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2889 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2890 rs6000_long_double_type_size);
2891 if (rs6000_long_double_type_size == 128)
2893 fprintf (stderr, DEBUG_FMT_S, "long double type",
2894 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2895 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2896 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2898 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2899 (int)rs6000_sched_restricted_insns_priority);
2900 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2901 (int)END_BUILTINS);
2902 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2903 (int)RS6000_BUILTIN_COUNT);
2905 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2906 (int)TARGET_FLOAT128_ENABLE_TYPE);
2908 if (TARGET_VSX)
2909 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2910 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2912 if (TARGET_DIRECT_MOVE_128)
2913 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2914 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2918 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2919 legitimate address support to figure out the appropriate addressing to
2920 use. */
2922 static void
2923 rs6000_setup_reg_addr_masks (void)
2925 ssize_t rc, reg, m, nregs;
2926 addr_mask_type any_addr_mask, addr_mask;
2928 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2930 machine_mode m2 = (machine_mode) m;
2931 bool complex_p = false;
2932 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2933 size_t msize;
2935 if (COMPLEX_MODE_P (m2))
2937 complex_p = true;
2938 m2 = GET_MODE_INNER (m2);
2941 msize = GET_MODE_SIZE (m2);
2943 /* SDmode is special in that we want to access it only via REG+REG
2944 addressing on power7 and above, since we want to use the LFIWZX and
2945 STFIWZX instructions to load it. */
2946 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2948 any_addr_mask = 0;
2949 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2951 addr_mask = 0;
2952 reg = reload_reg_map[rc].reg;
2954 /* Can mode values go in the GPR/FPR/Altivec registers? */
2955 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2957 bool small_int_vsx_p = (small_int_p
2958 && (rc == RELOAD_REG_FPR
2959 || rc == RELOAD_REG_VMX));
2961 nregs = rs6000_hard_regno_nregs[m][reg];
2962 addr_mask |= RELOAD_REG_VALID;
2964 /* Indicate if the mode takes more than 1 physical register. If
2965 it takes a single register, indicate it can do REG+REG
2966 addressing. Small integers in VSX registers can only do
2967 REG+REG addressing. */
2968 if (small_int_vsx_p)
2969 addr_mask |= RELOAD_REG_INDEXED;
2970 else if (nregs > 1 || m == BLKmode || complex_p)
2971 addr_mask |= RELOAD_REG_MULTIPLE;
2972 else
2973 addr_mask |= RELOAD_REG_INDEXED;
2975 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2976 addressing. If we allow scalars into Altivec registers,
2977 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2979 For VSX systems, we don't allow update addressing for
2980 DFmode/SFmode if those registers can go in both the
2981 traditional floating point registers and Altivec registers.
2982 The load/store instructions for the Altivec registers do not
2983 have update forms. If we allowed update addressing, it seems
2984 to break IV-OPT code using floating point if the index type is
2985 int instead of long (PR target/81550 and target/84042). */
2987 if (TARGET_UPDATE
2988 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2989 && msize <= 8
2990 && !VECTOR_MODE_P (m2)
2991 && !FLOAT128_VECTOR_P (m2)
2992 && !complex_p
2993 && (m != E_DFmode || !TARGET_VSX)
2994 && (m != E_SFmode || !TARGET_P8_VECTOR)
2995 && !small_int_vsx_p)
2997 addr_mask |= RELOAD_REG_PRE_INCDEC;
2999 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
3000 we don't allow PRE_MODIFY for some multi-register
3001 operations. */
3002 switch (m)
3004 default:
3005 addr_mask |= RELOAD_REG_PRE_MODIFY;
3006 break;
3008 case E_DImode:
3009 if (TARGET_POWERPC64)
3010 addr_mask |= RELOAD_REG_PRE_MODIFY;
3011 break;
3013 case E_DFmode:
3014 case E_DDmode:
3015 if (TARGET_HARD_FLOAT)
3016 addr_mask |= RELOAD_REG_PRE_MODIFY;
3017 break;
3022 /* GPR and FPR registers can do REG+OFFSET addressing, except
3023 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3024 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3025 if ((addr_mask != 0) && !indexed_only_p
3026 && msize <= 8
3027 && (rc == RELOAD_REG_GPR
3028 || ((msize == 8 || m2 == SFmode)
3029 && (rc == RELOAD_REG_FPR
3030 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
3031 addr_mask |= RELOAD_REG_OFFSET;
3033 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3034 instructions are enabled. The offset for 128-bit VSX registers is
3035 only 12-bits. While GPRs can handle the full offset range, VSX
3036 registers can only handle the restricted range. */
3037 else if ((addr_mask != 0) && !indexed_only_p
3038 && msize == 16 && TARGET_P9_VECTOR
3039 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3040 || (m2 == TImode && TARGET_VSX)))
3042 addr_mask |= RELOAD_REG_OFFSET;
3043 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3044 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3047 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3048 addressing on 128-bit types. */
3049 if (rc == RELOAD_REG_VMX && msize == 16
3050 && (addr_mask & RELOAD_REG_VALID) != 0)
3051 addr_mask |= RELOAD_REG_AND_M16;
3053 reg_addr[m].addr_mask[rc] = addr_mask;
3054 any_addr_mask |= addr_mask;
3057 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3062 /* Initialize the various global tables that are based on register size. */
3063 static void
3064 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3066 ssize_t r, m, c;
3067 int align64;
3068 int align32;
3070 /* Precalculate REGNO_REG_CLASS. */
3071 rs6000_regno_regclass[0] = GENERAL_REGS;
3072 for (r = 1; r < 32; ++r)
3073 rs6000_regno_regclass[r] = BASE_REGS;
3075 for (r = 32; r < 64; ++r)
3076 rs6000_regno_regclass[r] = FLOAT_REGS;
3078 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3079 rs6000_regno_regclass[r] = NO_REGS;
3081 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3082 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3084 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3085 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3086 rs6000_regno_regclass[r] = CR_REGS;
3088 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3089 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3090 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3091 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3092 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3093 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3094 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3095 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3096 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3097 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3099 /* Precalculate register class to simpler reload register class. We don't
3100 need all of the register classes that are combinations of different
3101 classes, just the simple ones that have constraint letters. */
3102 for (c = 0; c < N_REG_CLASSES; c++)
3103 reg_class_to_reg_type[c] = NO_REG_TYPE;
3105 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3106 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3107 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3108 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3109 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3110 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3111 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3112 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3113 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3114 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3116 if (TARGET_VSX)
3118 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3119 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3121 else
3123 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3124 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3127 /* Precalculate the valid memory formats as well as the vector information,
3128 this must be set up before the rs6000_hard_regno_nregs_internal calls
3129 below. */
3130 gcc_assert ((int)VECTOR_NONE == 0);
3131 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3132 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3134 gcc_assert ((int)CODE_FOR_nothing == 0);
3135 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3137 gcc_assert ((int)NO_REGS == 0);
3138 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3140 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3141 believes it can use native alignment or still uses 128-bit alignment. */
3142 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3144 align64 = 64;
3145 align32 = 32;
3147 else
3149 align64 = 128;
3150 align32 = 128;
3153 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3154 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3155 if (TARGET_FLOAT128_TYPE)
3157 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3158 rs6000_vector_align[KFmode] = 128;
3160 if (FLOAT128_IEEE_P (TFmode))
3162 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3163 rs6000_vector_align[TFmode] = 128;
3167 /* V2DF mode, VSX only. */
3168 if (TARGET_VSX)
3170 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3171 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3172 rs6000_vector_align[V2DFmode] = align64;
3175 /* V4SF mode, either VSX or Altivec. */
3176 if (TARGET_VSX)
3178 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3179 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3180 rs6000_vector_align[V4SFmode] = align32;
3182 else if (TARGET_ALTIVEC)
3184 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3185 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3186 rs6000_vector_align[V4SFmode] = align32;
3189 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3190 and stores. */
3191 if (TARGET_ALTIVEC)
3193 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3194 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3195 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3196 rs6000_vector_align[V4SImode] = align32;
3197 rs6000_vector_align[V8HImode] = align32;
3198 rs6000_vector_align[V16QImode] = align32;
3200 if (TARGET_VSX)
3202 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3203 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3204 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3206 else
3208 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3209 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3210 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3214 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3215 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3216 if (TARGET_VSX)
3218 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3219 rs6000_vector_unit[V2DImode]
3220 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3221 rs6000_vector_align[V2DImode] = align64;
3223 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3224 rs6000_vector_unit[V1TImode]
3225 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3226 rs6000_vector_align[V1TImode] = 128;
3229 /* DFmode, see if we want to use the VSX unit. Memory is handled
3230 differently, so don't set rs6000_vector_mem. */
3231 if (TARGET_VSX)
3233 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3234 rs6000_vector_align[DFmode] = 64;
3237 /* SFmode, see if we want to use the VSX unit. */
3238 if (TARGET_P8_VECTOR)
3240 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3241 rs6000_vector_align[SFmode] = 32;
3244 /* Allow TImode in VSX register and set the VSX memory macros. */
3245 if (TARGET_VSX)
3247 rs6000_vector_mem[TImode] = VECTOR_VSX;
3248 rs6000_vector_align[TImode] = align64;
3251 /* Register class constraints for the constraints that depend on compile
3252 switches. When the VSX code was added, different constraints were added
3253 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3254 of the VSX registers are used. The register classes for scalar floating
3255 point types is set, based on whether we allow that type into the upper
3256 (Altivec) registers. GCC has register classes to target the Altivec
3257 registers for load/store operations, to select using a VSX memory
3258 operation instead of the traditional floating point operation. The
3259 constraints are:
3261 d - Register class to use with traditional DFmode instructions.
3262 f - Register class to use with traditional SFmode instructions.
3263 v - Altivec register.
3264 wa - Any VSX register.
3265 wc - Reserved to represent individual CR bits (used in LLVM).
3266 wd - Preferred register class for V2DFmode.
3267 wf - Preferred register class for V4SFmode.
3268 wg - Float register for power6x move insns.
3269 wh - FP register for direct move instructions.
3270 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3271 wj - FP or VSX register to hold 64-bit integers for direct moves.
3272 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3273 wl - Float register if we can do 32-bit signed int loads.
3274 wm - VSX register for ISA 2.07 direct move operations.
3275 wn - always NO_REGS.
3276 wr - GPR if 64-bit mode is permitted.
3277 ws - Register class to do ISA 2.06 DF operations.
3278 wt - VSX register for TImode in VSX registers.
3279 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3280 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3281 ww - Register class to do SF conversions in with VSX operations.
3282 wx - Float register if we can do 32-bit int stores.
3283 wy - Register class to do ISA 2.07 SF operations.
3284 wz - Float register if we can do 32-bit unsigned int loads.
3285 wH - Altivec register if SImode is allowed in VSX registers.
3286 wI - VSX register if SImode is allowed in VSX registers.
3287 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3288 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3290 if (TARGET_HARD_FLOAT)
3292 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3293 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3296 if (TARGET_VSX)
3298 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3299 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3300 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3301 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3302 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3303 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3304 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3307 /* Add conditional constraints based on various options, to allow us to
3308 collapse multiple insn patterns. */
3309 if (TARGET_ALTIVEC)
3310 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3312 if (TARGET_MFPGPR) /* DFmode */
3313 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3315 if (TARGET_LFIWAX)
3316 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3318 if (TARGET_DIRECT_MOVE)
3320 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3321 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3322 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3323 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3324 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3325 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3328 if (TARGET_POWERPC64)
3330 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3331 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3334 if (TARGET_P8_VECTOR) /* SFmode */
3336 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3337 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3338 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3340 else if (TARGET_VSX)
3341 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3343 if (TARGET_STFIWX)
3344 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3346 if (TARGET_LFIWZX)
3347 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3349 if (TARGET_FLOAT128_TYPE)
3351 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3352 if (FLOAT128_IEEE_P (TFmode))
3353 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3356 if (TARGET_P9_VECTOR)
3358 /* Support for new D-form instructions. */
3359 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3361 /* Support for ISA 3.0 (power9) vectors. */
3362 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3365 /* Support for new direct moves (ISA 3.0 + 64bit). */
3366 if (TARGET_DIRECT_MOVE_128)
3367 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3369 /* Support small integers in VSX registers. */
3370 if (TARGET_P8_VECTOR)
3372 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3373 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3374 if (TARGET_P9_VECTOR)
3376 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3377 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3381 /* Set up the reload helper and direct move functions. */
3382 if (TARGET_VSX || TARGET_ALTIVEC)
3384 if (TARGET_64BIT)
3386 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3387 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3388 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3389 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3390 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3391 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3392 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3393 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3394 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3395 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3396 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3397 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3398 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3399 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3400 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3401 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3402 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3403 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3404 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3405 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3407 if (FLOAT128_VECTOR_P (KFmode))
3409 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3410 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3413 if (FLOAT128_VECTOR_P (TFmode))
3415 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3416 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3419 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3420 available. */
3421 if (TARGET_NO_SDMODE_STACK)
3423 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3424 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3427 if (TARGET_VSX)
3429 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3430 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3433 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3435 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3436 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3437 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3438 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3439 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3440 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3441 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3442 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3443 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3445 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3446 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3447 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3448 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3449 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3450 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3451 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3452 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3453 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3455 if (FLOAT128_VECTOR_P (KFmode))
3457 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3458 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3461 if (FLOAT128_VECTOR_P (TFmode))
3463 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3464 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3468 else
3470 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3471 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3472 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3473 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3474 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3475 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3476 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3477 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3478 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3479 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3480 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3481 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3482 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3483 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3484 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3485 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3486 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3487 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3488 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3489 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3491 if (FLOAT128_VECTOR_P (KFmode))
3493 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3494 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3497 if (FLOAT128_IEEE_P (TFmode))
3499 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3500 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3503 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3504 available. */
3505 if (TARGET_NO_SDMODE_STACK)
3507 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3508 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3511 if (TARGET_VSX)
3513 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3514 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3517 if (TARGET_DIRECT_MOVE)
3519 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3520 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3521 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3525 reg_addr[DFmode].scalar_in_vmx_p = true;
3526 reg_addr[DImode].scalar_in_vmx_p = true;
3528 if (TARGET_P8_VECTOR)
3530 reg_addr[SFmode].scalar_in_vmx_p = true;
3531 reg_addr[SImode].scalar_in_vmx_p = true;
3533 if (TARGET_P9_VECTOR)
3535 reg_addr[HImode].scalar_in_vmx_p = true;
3536 reg_addr[QImode].scalar_in_vmx_p = true;
3541 /* Setup the fusion operations. */
3542 if (TARGET_P8_FUSION)
3544 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3545 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3546 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3547 if (TARGET_64BIT)
3548 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3551 if (TARGET_P9_FUSION)
3553 struct fuse_insns {
3554 enum machine_mode mode; /* mode of the fused type. */
3555 enum machine_mode pmode; /* pointer mode. */
3556 enum rs6000_reload_reg_type rtype; /* register type. */
3557 enum insn_code load; /* load insn. */
3558 enum insn_code store; /* store insn. */
3561 static const struct fuse_insns addis_insns[] = {
3562 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3563 CODE_FOR_fusion_vsx_di_sf_load,
3564 CODE_FOR_fusion_vsx_di_sf_store },
3566 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3567 CODE_FOR_fusion_vsx_si_sf_load,
3568 CODE_FOR_fusion_vsx_si_sf_store },
3570 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3571 CODE_FOR_fusion_vsx_di_df_load,
3572 CODE_FOR_fusion_vsx_di_df_store },
3574 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3575 CODE_FOR_fusion_vsx_si_df_load,
3576 CODE_FOR_fusion_vsx_si_df_store },
3578 { E_DImode, E_DImode, RELOAD_REG_FPR,
3579 CODE_FOR_fusion_vsx_di_di_load,
3580 CODE_FOR_fusion_vsx_di_di_store },
3582 { E_DImode, E_SImode, RELOAD_REG_FPR,
3583 CODE_FOR_fusion_vsx_si_di_load,
3584 CODE_FOR_fusion_vsx_si_di_store },
3586 { E_QImode, E_DImode, RELOAD_REG_GPR,
3587 CODE_FOR_fusion_gpr_di_qi_load,
3588 CODE_FOR_fusion_gpr_di_qi_store },
3590 { E_QImode, E_SImode, RELOAD_REG_GPR,
3591 CODE_FOR_fusion_gpr_si_qi_load,
3592 CODE_FOR_fusion_gpr_si_qi_store },
3594 { E_HImode, E_DImode, RELOAD_REG_GPR,
3595 CODE_FOR_fusion_gpr_di_hi_load,
3596 CODE_FOR_fusion_gpr_di_hi_store },
3598 { E_HImode, E_SImode, RELOAD_REG_GPR,
3599 CODE_FOR_fusion_gpr_si_hi_load,
3600 CODE_FOR_fusion_gpr_si_hi_store },
3602 { E_SImode, E_DImode, RELOAD_REG_GPR,
3603 CODE_FOR_fusion_gpr_di_si_load,
3604 CODE_FOR_fusion_gpr_di_si_store },
3606 { E_SImode, E_SImode, RELOAD_REG_GPR,
3607 CODE_FOR_fusion_gpr_si_si_load,
3608 CODE_FOR_fusion_gpr_si_si_store },
3610 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3611 CODE_FOR_fusion_gpr_di_sf_load,
3612 CODE_FOR_fusion_gpr_di_sf_store },
3614 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3615 CODE_FOR_fusion_gpr_si_sf_load,
3616 CODE_FOR_fusion_gpr_si_sf_store },
3618 { E_DImode, E_DImode, RELOAD_REG_GPR,
3619 CODE_FOR_fusion_gpr_di_di_load,
3620 CODE_FOR_fusion_gpr_di_di_store },
3622 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3623 CODE_FOR_fusion_gpr_di_df_load,
3624 CODE_FOR_fusion_gpr_di_df_store },
3627 machine_mode cur_pmode = Pmode;
3628 size_t i;
3630 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3632 machine_mode xmode = addis_insns[i].mode;
3633 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3635 if (addis_insns[i].pmode != cur_pmode)
3636 continue;
3638 if (rtype == RELOAD_REG_FPR && !TARGET_HARD_FLOAT)
3639 continue;
3641 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3642 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3644 if (rtype == RELOAD_REG_FPR && TARGET_P9_VECTOR)
3646 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3647 = addis_insns[i].load;
3648 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3649 = addis_insns[i].store;
3654 /* Note which types we support fusing TOC setup plus memory insn. We only do
3655 fused TOCs for medium/large code models. */
3656 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3657 && (TARGET_CMODEL != CMODEL_SMALL))
3659 reg_addr[QImode].fused_toc = true;
3660 reg_addr[HImode].fused_toc = true;
3661 reg_addr[SImode].fused_toc = true;
3662 reg_addr[DImode].fused_toc = true;
3663 if (TARGET_HARD_FLOAT)
3665 reg_addr[SFmode].fused_toc = true;
3666 reg_addr[DFmode].fused_toc = true;
3670 /* Precalculate HARD_REGNO_NREGS. */
3671 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3672 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3673 rs6000_hard_regno_nregs[m][r]
3674 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3676 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3677 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3678 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3679 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3680 rs6000_hard_regno_mode_ok_p[m][r] = true;
3682 /* Precalculate CLASS_MAX_NREGS sizes. */
3683 for (c = 0; c < LIM_REG_CLASSES; ++c)
3685 int reg_size;
3687 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3688 reg_size = UNITS_PER_VSX_WORD;
3690 else if (c == ALTIVEC_REGS)
3691 reg_size = UNITS_PER_ALTIVEC_WORD;
3693 else if (c == FLOAT_REGS)
3694 reg_size = UNITS_PER_FP_WORD;
3696 else
3697 reg_size = UNITS_PER_WORD;
3699 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3701 machine_mode m2 = (machine_mode)m;
3702 int reg_size2 = reg_size;
3704 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3705 in VSX. */
3706 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3707 reg_size2 = UNITS_PER_FP_WORD;
3709 rs6000_class_max_nregs[m][c]
3710 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3714 /* Calculate which modes to automatically generate code to use a the
3715 reciprocal divide and square root instructions. In the future, possibly
3716 automatically generate the instructions even if the user did not specify
3717 -mrecip. The older machines double precision reciprocal sqrt estimate is
3718 not accurate enough. */
3719 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3720 if (TARGET_FRES)
3721 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3722 if (TARGET_FRE)
3723 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3724 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3725 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3726 if (VECTOR_UNIT_VSX_P (V2DFmode))
3727 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3729 if (TARGET_FRSQRTES)
3730 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3731 if (TARGET_FRSQRTE)
3732 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3733 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3734 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3735 if (VECTOR_UNIT_VSX_P (V2DFmode))
3736 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3738 if (rs6000_recip_control)
3740 if (!flag_finite_math_only)
3741 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3742 "-ffast-math");
3743 if (flag_trapping_math)
3744 warning (0, "%qs requires %qs or %qs", "-mrecip",
3745 "-fno-trapping-math", "-ffast-math");
3746 if (!flag_reciprocal_math)
3747 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3748 "-ffast-math");
3749 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3751 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3752 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3753 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3755 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3756 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3757 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3759 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3760 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3761 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3763 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3764 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3765 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3767 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3768 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3769 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3771 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3772 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3773 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3775 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3776 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3777 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3779 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3780 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3781 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3785 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3786 legitimate address support to figure out the appropriate addressing to
3787 use. */
3788 rs6000_setup_reg_addr_masks ();
3790 if (global_init_p || TARGET_DEBUG_TARGET)
3792 if (TARGET_DEBUG_REG)
3793 rs6000_debug_reg_global ();
3795 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3796 fprintf (stderr,
3797 "SImode variable mult cost = %d\n"
3798 "SImode constant mult cost = %d\n"
3799 "SImode short constant mult cost = %d\n"
3800 "DImode multipliciation cost = %d\n"
3801 "SImode division cost = %d\n"
3802 "DImode division cost = %d\n"
3803 "Simple fp operation cost = %d\n"
3804 "DFmode multiplication cost = %d\n"
3805 "SFmode division cost = %d\n"
3806 "DFmode division cost = %d\n"
3807 "cache line size = %d\n"
3808 "l1 cache size = %d\n"
3809 "l2 cache size = %d\n"
3810 "simultaneous prefetches = %d\n"
3811 "\n",
3812 rs6000_cost->mulsi,
3813 rs6000_cost->mulsi_const,
3814 rs6000_cost->mulsi_const9,
3815 rs6000_cost->muldi,
3816 rs6000_cost->divsi,
3817 rs6000_cost->divdi,
3818 rs6000_cost->fp,
3819 rs6000_cost->dmul,
3820 rs6000_cost->sdiv,
3821 rs6000_cost->ddiv,
3822 rs6000_cost->cache_line_size,
3823 rs6000_cost->l1_cache_size,
3824 rs6000_cost->l2_cache_size,
3825 rs6000_cost->simultaneous_prefetches);
3829 #if TARGET_MACHO
3830 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3832 static void
3833 darwin_rs6000_override_options (void)
3835 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3836 off. */
3837 rs6000_altivec_abi = 1;
3838 TARGET_ALTIVEC_VRSAVE = 1;
3839 rs6000_current_abi = ABI_DARWIN;
3841 if (DEFAULT_ABI == ABI_DARWIN
3842 && TARGET_64BIT)
3843 darwin_one_byte_bool = 1;
3845 if (TARGET_64BIT && ! TARGET_POWERPC64)
3847 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3848 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3850 if (flag_mkernel)
3852 rs6000_default_long_calls = 1;
3853 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3856 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3857 Altivec. */
3858 if (!flag_mkernel && !flag_apple_kext
3859 && TARGET_64BIT
3860 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3861 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3863 /* Unless the user (not the configurer) has explicitly overridden
3864 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3865 G4 unless targeting the kernel. */
3866 if (!flag_mkernel
3867 && !flag_apple_kext
3868 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3869 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3870 && ! global_options_set.x_rs6000_cpu_index)
3872 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3875 #endif
3877 /* If not otherwise specified by a target, make 'long double' equivalent to
3878 'double'. */
3880 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3881 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3882 #endif
3884 /* Return the builtin mask of the various options used that could affect which
3885 builtins were used. In the past we used target_flags, but we've run out of
3886 bits, and some options are no longer in target_flags. */
3888 HOST_WIDE_INT
3889 rs6000_builtin_mask_calculate (void)
3891 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3892 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3893 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3894 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3895 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3896 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3897 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3898 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3899 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3900 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3901 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3902 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3903 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3904 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3905 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3906 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3907 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3908 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3909 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3910 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3911 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3912 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3915 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3916 to clobber the XER[CA] bit because clobbering that bit without telling
3917 the compiler worked just fine with versions of GCC before GCC 5, and
3918 breaking a lot of older code in ways that are hard to track down is
3919 not such a great idea. */
3921 static rtx_insn *
3922 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3923 vec<const char *> &/*constraints*/,
3924 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3926 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3927 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3928 return NULL;
3931 /* Override command line options.
3933 Combine build-specific configuration information with options
3934 specified on the command line to set various state variables which
3935 influence code generation, optimization, and expansion of built-in
3936 functions. Assure that command-line configuration preferences are
3937 compatible with each other and with the build configuration; issue
3938 warnings while adjusting configuration or error messages while
3939 rejecting configuration.
3941 Upon entry to this function:
3943 This function is called once at the beginning of
3944 compilation, and then again at the start and end of compiling
3945 each section of code that has a different configuration, as
3946 indicated, for example, by adding the
3948 __attribute__((__target__("cpu=power9")))
3950 qualifier to a function definition or, for example, by bracketing
3951 code between
3953 #pragma GCC target("altivec")
3957 #pragma GCC reset_options
3959 directives. Parameter global_init_p is true for the initial
3960 invocation, which initializes global variables, and false for all
3961 subsequent invocations.
3964 Various global state information is assumed to be valid. This
3965 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3966 default CPU specified at build configure time, TARGET_DEFAULT,
3967 representing the default set of option flags for the default
3968 target, and global_options_set.x_rs6000_isa_flags, representing
3969 which options were requested on the command line.
3971 Upon return from this function:
3973 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3974 was set by name on the command line. Additionally, if certain
3975 attributes are automatically enabled or disabled by this function
3976 in order to assure compatibility between options and
3977 configuration, the flags associated with those attributes are
3978 also set. By setting these "explicit bits", we avoid the risk
3979 that other code might accidentally overwrite these particular
3980 attributes with "default values".
3982 The various bits of rs6000_isa_flags are set to indicate the
3983 target options that have been selected for the most current
3984 compilation efforts. This has the effect of also turning on the
3985 associated TARGET_XXX values since these are macros which are
3986 generally defined to test the corresponding bit of the
3987 rs6000_isa_flags variable.
3989 The variable rs6000_builtin_mask is set to represent the target
3990 options for the most current compilation efforts, consistent with
3991 the current contents of rs6000_isa_flags. This variable controls
3992 expansion of built-in functions.
3994 Various other global variables and fields of global structures
3995 (over 50 in all) are initialized to reflect the desired options
3996 for the most current compilation efforts. */
3998 static bool
3999 rs6000_option_override_internal (bool global_init_p)
4001 bool ret = true;
4003 HOST_WIDE_INT set_masks;
4004 HOST_WIDE_INT ignore_masks;
4005 int cpu_index = -1;
4006 int tune_index;
4007 struct cl_target_option *main_target_opt
4008 = ((global_init_p || target_option_default_node == NULL)
4009 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4011 /* Print defaults. */
4012 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4013 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4015 /* Remember the explicit arguments. */
4016 if (global_init_p)
4017 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4019 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4020 library functions, so warn about it. The flag may be useful for
4021 performance studies from time to time though, so don't disable it
4022 entirely. */
4023 if (global_options_set.x_rs6000_alignment_flags
4024 && rs6000_alignment_flags == MASK_ALIGN_POWER
4025 && DEFAULT_ABI == ABI_DARWIN
4026 && TARGET_64BIT)
4027 warning (0, "%qs is not supported for 64-bit Darwin;"
4028 " it is incompatible with the installed C and C++ libraries",
4029 "-malign-power");
4031 /* Numerous experiment shows that IRA based loop pressure
4032 calculation works better for RTL loop invariant motion on targets
4033 with enough (>= 32) registers. It is an expensive optimization.
4034 So it is on only for peak performance. */
4035 if (optimize >= 3 && global_init_p
4036 && !global_options_set.x_flag_ira_loop_pressure)
4037 flag_ira_loop_pressure = 1;
4039 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4040 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4041 options were already specified. */
4042 if (flag_sanitize & SANITIZE_USER_ADDRESS
4043 && !global_options_set.x_flag_asynchronous_unwind_tables)
4044 flag_asynchronous_unwind_tables = 1;
4046 /* Set the pointer size. */
4047 if (TARGET_64BIT)
4049 rs6000_pmode = DImode;
4050 rs6000_pointer_size = 64;
4052 else
4054 rs6000_pmode = SImode;
4055 rs6000_pointer_size = 32;
4058 /* Some OSs don't support saving the high part of 64-bit registers on context
4059 switch. Other OSs don't support saving Altivec registers. On those OSs,
4060 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4061 if the user wants either, the user must explicitly specify them and we
4062 won't interfere with the user's specification. */
4064 set_masks = POWERPC_MASKS;
4065 #ifdef OS_MISSING_POWERPC64
4066 if (OS_MISSING_POWERPC64)
4067 set_masks &= ~OPTION_MASK_POWERPC64;
4068 #endif
4069 #ifdef OS_MISSING_ALTIVEC
4070 if (OS_MISSING_ALTIVEC)
4071 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4072 | OTHER_VSX_VECTOR_MASKS);
4073 #endif
4075 /* Don't override by the processor default if given explicitly. */
4076 set_masks &= ~rs6000_isa_flags_explicit;
4078 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4079 the cpu in a target attribute or pragma, but did not specify a tuning
4080 option, use the cpu for the tuning option rather than the option specified
4081 with -mtune on the command line. Process a '--with-cpu' configuration
4082 request as an implicit --cpu. */
4083 if (rs6000_cpu_index >= 0)
4084 cpu_index = rs6000_cpu_index;
4085 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4086 cpu_index = main_target_opt->x_rs6000_cpu_index;
4087 else if (OPTION_TARGET_CPU_DEFAULT)
4088 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
4090 if (cpu_index >= 0)
4092 const char *unavailable_cpu = NULL;
4093 switch (processor_target_table[cpu_index].processor)
4095 #ifndef HAVE_AS_POWER9
4096 case PROCESSOR_POWER9:
4097 unavailable_cpu = "power9";
4098 break;
4099 #endif
4100 #ifndef HAVE_AS_POWER8
4101 case PROCESSOR_POWER8:
4102 unavailable_cpu = "power8";
4103 break;
4104 #endif
4105 #ifndef HAVE_AS_POPCNTD
4106 case PROCESSOR_POWER7:
4107 unavailable_cpu = "power7";
4108 break;
4109 #endif
4110 #ifndef HAVE_AS_DFP
4111 case PROCESSOR_POWER6:
4112 unavailable_cpu = "power6";
4113 break;
4114 #endif
4115 #ifndef HAVE_AS_POPCNTB
4116 case PROCESSOR_POWER5:
4117 unavailable_cpu = "power5";
4118 break;
4119 #endif
4120 default:
4121 break;
4123 if (unavailable_cpu)
4125 cpu_index = -1;
4126 warning (0, "will not generate %qs instructions because "
4127 "assembler lacks %qs support", unavailable_cpu,
4128 unavailable_cpu);
4132 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4133 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4134 with those from the cpu, except for options that were explicitly set. If
4135 we don't have a cpu, do not override the target bits set in
4136 TARGET_DEFAULT. */
4137 if (cpu_index >= 0)
4139 rs6000_cpu_index = cpu_index;
4140 rs6000_isa_flags &= ~set_masks;
4141 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4142 & set_masks);
4144 else
4146 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4147 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4148 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4149 to using rs6000_isa_flags, we need to do the initialization here.
4151 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4152 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4153 HOST_WIDE_INT flags;
4154 if (TARGET_DEFAULT)
4155 flags = TARGET_DEFAULT;
4156 else
4158 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4159 const char *default_cpu = (!TARGET_POWERPC64
4160 ? "powerpc"
4161 : (BYTES_BIG_ENDIAN
4162 ? "powerpc64"
4163 : "powerpc64le"));
4164 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
4165 flags = processor_target_table[default_cpu_index].target_enable;
4167 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4170 if (rs6000_tune_index >= 0)
4171 tune_index = rs6000_tune_index;
4172 else if (cpu_index >= 0)
4173 rs6000_tune_index = tune_index = cpu_index;
4174 else
4176 size_t i;
4177 enum processor_type tune_proc
4178 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4180 tune_index = -1;
4181 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4182 if (processor_target_table[i].processor == tune_proc)
4184 tune_index = i;
4185 break;
4189 if (cpu_index >= 0)
4190 rs6000_cpu = processor_target_table[cpu_index].processor;
4191 else
4192 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
4194 gcc_assert (tune_index >= 0);
4195 rs6000_tune = processor_target_table[tune_index].processor;
4197 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4198 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4199 || rs6000_cpu == PROCESSOR_PPCE5500)
4201 if (TARGET_ALTIVEC)
4202 error ("AltiVec not supported in this target");
4205 /* If we are optimizing big endian systems for space, use the load/store
4206 multiple instructions. */
4207 if (BYTES_BIG_ENDIAN && optimize_size)
4208 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
4210 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
4211 because the hardware doesn't support the instructions used in little
4212 endian mode, and causes an alignment trap. The 750 does not cause an
4213 alignment trap (except when the target is unaligned). */
4215 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
4217 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4218 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4219 warning (0, "%qs is not supported on little endian systems",
4220 "-mmultiple");
4223 /* If little-endian, default to -mstrict-align on older processors.
4224 Testing for htm matches power8 and later. */
4225 if (!BYTES_BIG_ENDIAN
4226 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4227 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4229 if (!rs6000_fold_gimple)
4230 fprintf (stderr,
4231 "gimple folding of rs6000 builtins has been disabled.\n");
4233 /* Add some warnings for VSX. */
4234 if (TARGET_VSX)
4236 const char *msg = NULL;
4237 if (!TARGET_HARD_FLOAT)
4239 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4240 msg = N_("-mvsx requires hardware floating point");
4241 else
4243 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4244 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4247 else if (TARGET_AVOID_XFORM > 0)
4248 msg = N_("-mvsx needs indexed addressing");
4249 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4250 & OPTION_MASK_ALTIVEC))
4252 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4253 msg = N_("-mvsx and -mno-altivec are incompatible");
4254 else
4255 msg = N_("-mno-altivec disables vsx");
4258 if (msg)
4260 warning (0, msg);
4261 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4262 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4266 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4267 the -mcpu setting to enable options that conflict. */
4268 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4269 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4270 | OPTION_MASK_ALTIVEC
4271 | OPTION_MASK_VSX)) != 0)
4272 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4273 | OPTION_MASK_DIRECT_MOVE)
4274 & ~rs6000_isa_flags_explicit);
4276 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4277 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4279 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4280 off all of the options that depend on those flags. */
4281 ignore_masks = rs6000_disable_incompatible_switches ();
4283 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4284 unless the user explicitly used the -mno-<option> to disable the code. */
4285 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
4286 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4287 else if (TARGET_P9_MINMAX)
4289 if (cpu_index >= 0)
4291 if (cpu_index == PROCESSOR_POWER9)
4293 /* legacy behavior: allow -mcpu=power9 with certain
4294 capabilities explicitly disabled. */
4295 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4297 else
4298 error ("power9 target option is incompatible with %<%s=<xxx>%> "
4299 "for <xxx> less than power9", "-mcpu");
4301 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4302 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4303 & rs6000_isa_flags_explicit))
4304 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4305 were explicitly cleared. */
4306 error ("%qs incompatible with explicitly disabled options",
4307 "-mpower9-minmax");
4308 else
4309 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4311 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4312 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4313 else if (TARGET_VSX)
4314 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4315 else if (TARGET_POPCNTD)
4316 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4317 else if (TARGET_DFP)
4318 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4319 else if (TARGET_CMPB)
4320 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4321 else if (TARGET_FPRND)
4322 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4323 else if (TARGET_POPCNTB)
4324 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4325 else if (TARGET_ALTIVEC)
4326 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4328 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4330 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4331 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4332 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4335 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4337 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4338 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4339 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4342 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4344 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4345 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4346 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4349 if (TARGET_P8_VECTOR && !TARGET_VSX)
4351 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4352 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4353 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4354 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4356 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4357 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4358 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4360 else
4362 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4363 not explicit. */
4364 rs6000_isa_flags |= OPTION_MASK_VSX;
4365 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4369 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4371 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4372 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4373 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4376 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4377 silently turn off quad memory mode. */
4378 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4380 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4381 warning (0, N_("-mquad-memory requires 64-bit mode"));
4383 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4384 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4386 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4387 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4390 /* Non-atomic quad memory load/store are disabled for little endian, since
4391 the words are reversed, but atomic operations can still be done by
4392 swapping the words. */
4393 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4395 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4396 warning (0, N_("-mquad-memory is not available in little endian "
4397 "mode"));
4399 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4402 /* Assume if the user asked for normal quad memory instructions, they want
4403 the atomic versions as well, unless they explicity told us not to use quad
4404 word atomic instructions. */
4405 if (TARGET_QUAD_MEMORY
4406 && !TARGET_QUAD_MEMORY_ATOMIC
4407 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4408 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4410 /* If we can shrink-wrap the TOC register save separately, then use
4411 -msave-toc-indirect unless explicitly disabled. */
4412 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4413 && flag_shrink_wrap_separate
4414 && optimize_function_for_speed_p (cfun))
4415 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4417 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4418 generating power8 instructions. */
4419 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4420 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4421 & OPTION_MASK_P8_FUSION);
4423 /* Setting additional fusion flags turns on base fusion. */
4424 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4426 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4428 if (TARGET_P8_FUSION_SIGN)
4429 error ("%qs requires %qs", "-mpower8-fusion-sign",
4430 "-mpower8-fusion");
4432 if (TARGET_TOC_FUSION)
4433 error ("%qs requires %qs", "-mtoc-fusion", "-mpower8-fusion");
4435 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4437 else
4438 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4441 /* Power9 fusion is a superset over power8 fusion. */
4442 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4444 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4446 /* We prefer to not mention undocumented options in
4447 error messages. However, if users have managed to select
4448 power9-fusion without selecting power8-fusion, they
4449 already know about undocumented flags. */
4450 error ("%qs requires %qs", "-mpower9-fusion", "-mpower8-fusion");
4451 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4453 else
4454 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4457 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4458 generating power9 instructions. */
4459 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4460 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4461 & OPTION_MASK_P9_FUSION);
4463 /* Power8 does not fuse sign extended loads with the addis. If we are
4464 optimizing at high levels for speed, convert a sign extended load into a
4465 zero extending load, and an explicit sign extension. */
4466 if (TARGET_P8_FUSION
4467 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4468 && optimize_function_for_speed_p (cfun)
4469 && optimize >= 3)
4470 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4472 /* TOC fusion requires 64-bit and medium/large code model. */
4473 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4475 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4476 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4477 warning (0, N_("-mtoc-fusion requires 64-bit"));
4480 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4482 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4483 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4484 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4487 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4488 model. */
4489 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4490 && (TARGET_CMODEL != CMODEL_SMALL)
4491 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4492 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4494 /* ISA 3.0 vector instructions include ISA 2.07. */
4495 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4497 /* We prefer to not mention undocumented options in
4498 error messages. However, if users have managed to select
4499 power9-vector without selecting power8-vector, they
4500 already know about undocumented flags. */
4501 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4502 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4503 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4504 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4506 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4507 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4508 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4510 else
4512 /* OPTION_MASK_P9_VECTOR is explicit and
4513 OPTION_MASK_P8_VECTOR is not explicit. */
4514 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4515 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4519 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4520 support. If we only have ISA 2.06 support, and the user did not specify
4521 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4522 but we don't enable the full vectorization support */
4523 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4524 TARGET_ALLOW_MOVMISALIGN = 1;
4526 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4528 if (TARGET_ALLOW_MOVMISALIGN > 0
4529 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4530 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4532 TARGET_ALLOW_MOVMISALIGN = 0;
4535 /* Determine when unaligned vector accesses are permitted, and when
4536 they are preferred over masked Altivec loads. Note that if
4537 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4538 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4539 not true. */
4540 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4542 if (!TARGET_VSX)
4544 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4545 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4547 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4550 else if (!TARGET_ALLOW_MOVMISALIGN)
4552 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4553 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4554 "-mallow-movmisalign");
4556 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4560 /* Set long double size before the IEEE 128-bit tests. */
4561 if (!global_options_set.x_rs6000_long_double_type_size)
4563 if (main_target_opt != NULL
4564 && (main_target_opt->x_rs6000_long_double_type_size
4565 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4566 error ("target attribute or pragma changes long double size");
4567 else
4568 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4571 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4572 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4573 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4574 those systems will not pick up this default. Warn if the user changes the
4575 default unless either the user used the -Wno-psabi option, or the compiler
4576 was built to enable multilibs to switch between the two long double
4577 types. */
4578 if (!global_options_set.x_rs6000_ieeequad)
4579 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4581 else if (!TARGET_IEEEQUAD_MULTILIB
4582 && rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT
4583 && TARGET_LONG_DOUBLE_128)
4585 static bool warned_change_long_double;
4586 if (!warned_change_long_double)
4588 warned_change_long_double = true;
4589 if (TARGET_IEEEQUAD)
4590 warning (OPT_Wpsabi, "Using IEEE extended precision long double");
4591 else
4592 warning (OPT_Wpsabi, "Using IBM extended precision long double");
4596 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4597 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4598 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4599 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4600 the keyword as well as the type. */
4601 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4603 /* IEEE 128-bit floating point requires VSX support. */
4604 if (TARGET_FLOAT128_KEYWORD)
4606 if (!TARGET_VSX)
4608 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4609 error ("%qs requires VSX support", "-mfloat128");
4611 TARGET_FLOAT128_TYPE = 0;
4612 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4613 | OPTION_MASK_FLOAT128_HW);
4615 else if (!TARGET_FLOAT128_TYPE)
4617 TARGET_FLOAT128_TYPE = 1;
4618 warning (0, "The -mfloat128 option may not be fully supported");
4622 /* Enable the __float128 keyword under Linux by default. */
4623 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4624 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4625 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4627 /* If we have are supporting the float128 type and full ISA 3.0 support,
4628 enable -mfloat128-hardware by default. However, don't enable the
4629 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4630 because sometimes the compiler wants to put things in an integer
4631 container, and if we don't have __int128 support, it is impossible. */
4632 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4633 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4634 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4635 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4637 if (TARGET_FLOAT128_HW
4638 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4640 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4641 error ("%qs requires full ISA 3.0 support", "-mfloat128-hardware");
4643 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4646 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4648 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4649 error ("%qs requires %qs", "-mfloat128-hardware", "-m64");
4651 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4654 /* Print the options after updating the defaults. */
4655 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4656 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4658 /* E500mc does "better" if we inline more aggressively. Respect the
4659 user's opinion, though. */
4660 if (rs6000_block_move_inline_limit == 0
4661 && (rs6000_tune == PROCESSOR_PPCE500MC
4662 || rs6000_tune == PROCESSOR_PPCE500MC64
4663 || rs6000_tune == PROCESSOR_PPCE5500
4664 || rs6000_tune == PROCESSOR_PPCE6500))
4665 rs6000_block_move_inline_limit = 128;
4667 /* store_one_arg depends on expand_block_move to handle at least the
4668 size of reg_parm_stack_space. */
4669 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4670 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4672 if (global_init_p)
4674 /* If the appropriate debug option is enabled, replace the target hooks
4675 with debug versions that call the real version and then prints
4676 debugging information. */
4677 if (TARGET_DEBUG_COST)
4679 targetm.rtx_costs = rs6000_debug_rtx_costs;
4680 targetm.address_cost = rs6000_debug_address_cost;
4681 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4684 if (TARGET_DEBUG_ADDR)
4686 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4687 targetm.legitimize_address = rs6000_debug_legitimize_address;
4688 rs6000_secondary_reload_class_ptr
4689 = rs6000_debug_secondary_reload_class;
4690 targetm.secondary_memory_needed
4691 = rs6000_debug_secondary_memory_needed;
4692 targetm.can_change_mode_class
4693 = rs6000_debug_can_change_mode_class;
4694 rs6000_preferred_reload_class_ptr
4695 = rs6000_debug_preferred_reload_class;
4696 rs6000_legitimize_reload_address_ptr
4697 = rs6000_debug_legitimize_reload_address;
4698 rs6000_mode_dependent_address_ptr
4699 = rs6000_debug_mode_dependent_address;
4702 if (rs6000_veclibabi_name)
4704 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4705 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4706 else
4708 error ("unknown vectorization library ABI type (%qs) for "
4709 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4710 ret = false;
4715 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4716 target attribute or pragma which automatically enables both options,
4717 unless the altivec ABI was set. This is set by default for 64-bit, but
4718 not for 32-bit. */
4719 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4721 TARGET_FLOAT128_TYPE = 0;
4722 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4723 | OPTION_MASK_FLOAT128_KEYWORD)
4724 & ~rs6000_isa_flags_explicit);
4727 /* Enable Altivec ABI for AIX -maltivec. */
4728 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4730 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4731 error ("target attribute or pragma changes AltiVec ABI");
4732 else
4733 rs6000_altivec_abi = 1;
4736 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4737 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4738 be explicitly overridden in either case. */
4739 if (TARGET_ELF)
4741 if (!global_options_set.x_rs6000_altivec_abi
4742 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4744 if (main_target_opt != NULL &&
4745 !main_target_opt->x_rs6000_altivec_abi)
4746 error ("target attribute or pragma changes AltiVec ABI");
4747 else
4748 rs6000_altivec_abi = 1;
4752 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4753 So far, the only darwin64 targets are also MACH-O. */
4754 if (TARGET_MACHO
4755 && DEFAULT_ABI == ABI_DARWIN
4756 && TARGET_64BIT)
4758 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4759 error ("target attribute or pragma changes darwin64 ABI");
4760 else
4762 rs6000_darwin64_abi = 1;
4763 /* Default to natural alignment, for better performance. */
4764 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4768 /* Place FP constants in the constant pool instead of TOC
4769 if section anchors enabled. */
4770 if (flag_section_anchors
4771 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4772 TARGET_NO_FP_IN_TOC = 1;
4774 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4775 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4777 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4778 SUBTARGET_OVERRIDE_OPTIONS;
4779 #endif
4780 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4781 SUBSUBTARGET_OVERRIDE_OPTIONS;
4782 #endif
4783 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4784 SUB3TARGET_OVERRIDE_OPTIONS;
4785 #endif
4787 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4788 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4790 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4791 && rs6000_tune != PROCESSOR_POWER5
4792 && rs6000_tune != PROCESSOR_POWER6
4793 && rs6000_tune != PROCESSOR_POWER7
4794 && rs6000_tune != PROCESSOR_POWER8
4795 && rs6000_tune != PROCESSOR_POWER9
4796 && rs6000_tune != PROCESSOR_PPCA2
4797 && rs6000_tune != PROCESSOR_CELL
4798 && rs6000_tune != PROCESSOR_PPC476);
4799 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4800 || rs6000_tune == PROCESSOR_POWER5
4801 || rs6000_tune == PROCESSOR_POWER7
4802 || rs6000_tune == PROCESSOR_POWER8);
4803 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4804 || rs6000_tune == PROCESSOR_POWER5
4805 || rs6000_tune == PROCESSOR_POWER6
4806 || rs6000_tune == PROCESSOR_POWER7
4807 || rs6000_tune == PROCESSOR_POWER8
4808 || rs6000_tune == PROCESSOR_POWER9
4809 || rs6000_tune == PROCESSOR_PPCE500MC
4810 || rs6000_tune == PROCESSOR_PPCE500MC64
4811 || rs6000_tune == PROCESSOR_PPCE5500
4812 || rs6000_tune == PROCESSOR_PPCE6500);
4814 /* Allow debug switches to override the above settings. These are set to -1
4815 in rs6000.opt to indicate the user hasn't directly set the switch. */
4816 if (TARGET_ALWAYS_HINT >= 0)
4817 rs6000_always_hint = TARGET_ALWAYS_HINT;
4819 if (TARGET_SCHED_GROUPS >= 0)
4820 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4822 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4823 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4825 rs6000_sched_restricted_insns_priority
4826 = (rs6000_sched_groups ? 1 : 0);
4828 /* Handle -msched-costly-dep option. */
4829 rs6000_sched_costly_dep
4830 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4832 if (rs6000_sched_costly_dep_str)
4834 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4835 rs6000_sched_costly_dep = no_dep_costly;
4836 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4837 rs6000_sched_costly_dep = all_deps_costly;
4838 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4839 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4840 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4841 rs6000_sched_costly_dep = store_to_load_dep_costly;
4842 else
4843 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4844 atoi (rs6000_sched_costly_dep_str));
4847 /* Handle -minsert-sched-nops option. */
4848 rs6000_sched_insert_nops
4849 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4851 if (rs6000_sched_insert_nops_str)
4853 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4854 rs6000_sched_insert_nops = sched_finish_none;
4855 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4856 rs6000_sched_insert_nops = sched_finish_pad_groups;
4857 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4858 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4859 else
4860 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4861 atoi (rs6000_sched_insert_nops_str));
4864 /* Handle stack protector */
4865 if (!global_options_set.x_rs6000_stack_protector_guard)
4866 #ifdef TARGET_THREAD_SSP_OFFSET
4867 rs6000_stack_protector_guard = SSP_TLS;
4868 #else
4869 rs6000_stack_protector_guard = SSP_GLOBAL;
4870 #endif
4872 #ifdef TARGET_THREAD_SSP_OFFSET
4873 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4874 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4875 #endif
4877 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4879 char *endp;
4880 const char *str = rs6000_stack_protector_guard_offset_str;
4882 errno = 0;
4883 long offset = strtol (str, &endp, 0);
4884 if (!*str || *endp || errno)
4885 error ("%qs is not a valid number in %qs", str,
4886 "-mstack-protector-guard-offset=");
4888 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4889 || (TARGET_64BIT && (offset & 3)))
4890 error ("%qs is not a valid offset in %qs", str,
4891 "-mstack-protector-guard-offset=");
4893 rs6000_stack_protector_guard_offset = offset;
4896 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4898 const char *str = rs6000_stack_protector_guard_reg_str;
4899 int reg = decode_reg_name (str);
4901 if (!IN_RANGE (reg, 1, 31))
4902 error ("%qs is not a valid base register in %qs", str,
4903 "-mstack-protector-guard-reg=");
4905 rs6000_stack_protector_guard_reg = reg;
4908 if (rs6000_stack_protector_guard == SSP_TLS
4909 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4910 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4912 if (global_init_p)
4914 #ifdef TARGET_REGNAMES
4915 /* If the user desires alternate register names, copy in the
4916 alternate names now. */
4917 if (TARGET_REGNAMES)
4918 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4919 #endif
4921 /* Set aix_struct_return last, after the ABI is determined.
4922 If -maix-struct-return or -msvr4-struct-return was explicitly
4923 used, don't override with the ABI default. */
4924 if (!global_options_set.x_aix_struct_return)
4925 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4927 #if 0
4928 /* IBM XL compiler defaults to unsigned bitfields. */
4929 if (TARGET_XL_COMPAT)
4930 flag_signed_bitfields = 0;
4931 #endif
4933 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4934 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4936 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4938 /* We can only guarantee the availability of DI pseudo-ops when
4939 assembling for 64-bit targets. */
4940 if (!TARGET_64BIT)
4942 targetm.asm_out.aligned_op.di = NULL;
4943 targetm.asm_out.unaligned_op.di = NULL;
4947 /* Set branch target alignment, if not optimizing for size. */
4948 if (!optimize_size)
4950 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4951 aligned 8byte to avoid misprediction by the branch predictor. */
4952 if (rs6000_tune == PROCESSOR_TITAN
4953 || rs6000_tune == PROCESSOR_CELL)
4955 if (align_functions <= 0)
4956 align_functions = 8;
4957 if (align_jumps <= 0)
4958 align_jumps = 8;
4959 if (align_loops <= 0)
4960 align_loops = 8;
4962 if (rs6000_align_branch_targets)
4964 if (align_functions <= 0)
4965 align_functions = 16;
4966 if (align_jumps <= 0)
4967 align_jumps = 16;
4968 if (align_loops <= 0)
4970 can_override_loop_align = 1;
4971 align_loops = 16;
4974 if (align_jumps_max_skip <= 0)
4975 align_jumps_max_skip = 15;
4976 if (align_loops_max_skip <= 0)
4977 align_loops_max_skip = 15;
4980 /* Arrange to save and restore machine status around nested functions. */
4981 init_machine_status = rs6000_init_machine_status;
4983 /* We should always be splitting complex arguments, but we can't break
4984 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4985 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4986 targetm.calls.split_complex_arg = NULL;
4988 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4989 if (DEFAULT_ABI == ABI_AIX)
4990 targetm.calls.custom_function_descriptors = 0;
4993 /* Initialize rs6000_cost with the appropriate target costs. */
4994 if (optimize_size)
4995 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4996 else
4997 switch (rs6000_tune)
4999 case PROCESSOR_RS64A:
5000 rs6000_cost = &rs64a_cost;
5001 break;
5003 case PROCESSOR_MPCCORE:
5004 rs6000_cost = &mpccore_cost;
5005 break;
5007 case PROCESSOR_PPC403:
5008 rs6000_cost = &ppc403_cost;
5009 break;
5011 case PROCESSOR_PPC405:
5012 rs6000_cost = &ppc405_cost;
5013 break;
5015 case PROCESSOR_PPC440:
5016 rs6000_cost = &ppc440_cost;
5017 break;
5019 case PROCESSOR_PPC476:
5020 rs6000_cost = &ppc476_cost;
5021 break;
5023 case PROCESSOR_PPC601:
5024 rs6000_cost = &ppc601_cost;
5025 break;
5027 case PROCESSOR_PPC603:
5028 rs6000_cost = &ppc603_cost;
5029 break;
5031 case PROCESSOR_PPC604:
5032 rs6000_cost = &ppc604_cost;
5033 break;
5035 case PROCESSOR_PPC604e:
5036 rs6000_cost = &ppc604e_cost;
5037 break;
5039 case PROCESSOR_PPC620:
5040 rs6000_cost = &ppc620_cost;
5041 break;
5043 case PROCESSOR_PPC630:
5044 rs6000_cost = &ppc630_cost;
5045 break;
5047 case PROCESSOR_CELL:
5048 rs6000_cost = &ppccell_cost;
5049 break;
5051 case PROCESSOR_PPC750:
5052 case PROCESSOR_PPC7400:
5053 rs6000_cost = &ppc750_cost;
5054 break;
5056 case PROCESSOR_PPC7450:
5057 rs6000_cost = &ppc7450_cost;
5058 break;
5060 case PROCESSOR_PPC8540:
5061 case PROCESSOR_PPC8548:
5062 rs6000_cost = &ppc8540_cost;
5063 break;
5065 case PROCESSOR_PPCE300C2:
5066 case PROCESSOR_PPCE300C3:
5067 rs6000_cost = &ppce300c2c3_cost;
5068 break;
5070 case PROCESSOR_PPCE500MC:
5071 rs6000_cost = &ppce500mc_cost;
5072 break;
5074 case PROCESSOR_PPCE500MC64:
5075 rs6000_cost = &ppce500mc64_cost;
5076 break;
5078 case PROCESSOR_PPCE5500:
5079 rs6000_cost = &ppce5500_cost;
5080 break;
5082 case PROCESSOR_PPCE6500:
5083 rs6000_cost = &ppce6500_cost;
5084 break;
5086 case PROCESSOR_TITAN:
5087 rs6000_cost = &titan_cost;
5088 break;
5090 case PROCESSOR_POWER4:
5091 case PROCESSOR_POWER5:
5092 rs6000_cost = &power4_cost;
5093 break;
5095 case PROCESSOR_POWER6:
5096 rs6000_cost = &power6_cost;
5097 break;
5099 case PROCESSOR_POWER7:
5100 rs6000_cost = &power7_cost;
5101 break;
5103 case PROCESSOR_POWER8:
5104 rs6000_cost = &power8_cost;
5105 break;
5107 case PROCESSOR_POWER9:
5108 rs6000_cost = &power9_cost;
5109 break;
5111 case PROCESSOR_PPCA2:
5112 rs6000_cost = &ppca2_cost;
5113 break;
5115 default:
5116 gcc_unreachable ();
5119 if (global_init_p)
5121 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5122 rs6000_cost->simultaneous_prefetches,
5123 global_options.x_param_values,
5124 global_options_set.x_param_values);
5125 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5126 global_options.x_param_values,
5127 global_options_set.x_param_values);
5128 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5129 rs6000_cost->cache_line_size,
5130 global_options.x_param_values,
5131 global_options_set.x_param_values);
5132 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5133 global_options.x_param_values,
5134 global_options_set.x_param_values);
5136 /* Increase loop peeling limits based on performance analysis. */
5137 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5138 global_options.x_param_values,
5139 global_options_set.x_param_values);
5140 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5141 global_options.x_param_values,
5142 global_options_set.x_param_values);
5144 /* Use the 'model' -fsched-pressure algorithm by default. */
5145 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5146 SCHED_PRESSURE_MODEL,
5147 global_options.x_param_values,
5148 global_options_set.x_param_values);
5150 /* If using typedef char *va_list, signal that
5151 __builtin_va_start (&ap, 0) can be optimized to
5152 ap = __builtin_next_arg (0). */
5153 if (DEFAULT_ABI != ABI_V4)
5154 targetm.expand_builtin_va_start = NULL;
5157 /* If not explicitly specified via option, decide whether to generate indexed
5158 load/store instructions. A value of -1 indicates that the
5159 initial value of this variable has not been overwritten. During
5160 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5161 if (TARGET_AVOID_XFORM == -1)
5162 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5163 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5164 need indexed accesses and the type used is the scalar type of the element
5165 being loaded or stored. */
5166 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
5167 && !TARGET_ALTIVEC);
5169 /* Set the -mrecip options. */
5170 if (rs6000_recip_name)
5172 char *p = ASTRDUP (rs6000_recip_name);
5173 char *q;
5174 unsigned int mask, i;
5175 bool invert;
5177 while ((q = strtok (p, ",")) != NULL)
5179 p = NULL;
5180 if (*q == '!')
5182 invert = true;
5183 q++;
5185 else
5186 invert = false;
5188 if (!strcmp (q, "default"))
5189 mask = ((TARGET_RECIP_PRECISION)
5190 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5191 else
5193 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5194 if (!strcmp (q, recip_options[i].string))
5196 mask = recip_options[i].mask;
5197 break;
5200 if (i == ARRAY_SIZE (recip_options))
5202 error ("unknown option for %<%s=%s%>", "-mrecip", q);
5203 invert = false;
5204 mask = 0;
5205 ret = false;
5209 if (invert)
5210 rs6000_recip_control &= ~mask;
5211 else
5212 rs6000_recip_control |= mask;
5216 /* Set the builtin mask of the various options used that could affect which
5217 builtins were used. In the past we used target_flags, but we've run out
5218 of bits, and some options are no longer in target_flags. */
5219 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5220 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5221 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5222 rs6000_builtin_mask);
5224 /* Initialize all of the registers. */
5225 rs6000_init_hard_regno_mode_ok (global_init_p);
5227 /* Save the initial options in case the user does function specific options */
5228 if (global_init_p)
5229 target_option_default_node = target_option_current_node
5230 = build_target_option_node (&global_options);
5232 /* If not explicitly specified via option, decide whether to generate the
5233 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5234 if (TARGET_LINK_STACK == -1)
5235 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
5237 /* Deprecate use of -mno-speculate-indirect-jumps. */
5238 if (!rs6000_speculate_indirect_jumps)
5239 warning (0, "%qs is deprecated and not recommended in any circumstances",
5240 "-mno-speculate-indirect-jumps");
5242 return ret;
5245 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5246 define the target cpu type. */
5248 static void
5249 rs6000_option_override (void)
5251 (void) rs6000_option_override_internal (true);
5255 /* Implement targetm.vectorize.builtin_mask_for_load. */
5256 static tree
5257 rs6000_builtin_mask_for_load (void)
5259 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5260 if ((TARGET_ALTIVEC && !TARGET_VSX)
5261 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5262 return altivec_builtin_mask_for_load;
5263 else
5264 return 0;
5267 /* Implement LOOP_ALIGN. */
5269 rs6000_loop_align (rtx label)
5271 basic_block bb;
5272 int ninsns;
5274 /* Don't override loop alignment if -falign-loops was specified. */
5275 if (!can_override_loop_align)
5276 return align_loops_log;
5278 bb = BLOCK_FOR_INSN (label);
5279 ninsns = num_loop_insns(bb->loop_father);
5281 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5282 if (ninsns > 4 && ninsns <= 8
5283 && (rs6000_tune == PROCESSOR_POWER4
5284 || rs6000_tune == PROCESSOR_POWER5
5285 || rs6000_tune == PROCESSOR_POWER6
5286 || rs6000_tune == PROCESSOR_POWER7
5287 || rs6000_tune == PROCESSOR_POWER8))
5288 return 5;
5289 else
5290 return align_loops_log;
5293 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5294 static int
5295 rs6000_loop_align_max_skip (rtx_insn *label)
5297 return (1 << rs6000_loop_align (label)) - 1;
5300 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5301 after applying N number of iterations. This routine does not determine
5302 how may iterations are required to reach desired alignment. */
5304 static bool
5305 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5307 if (is_packed)
5308 return false;
5310 if (TARGET_32BIT)
5312 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5313 return true;
5315 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5316 return true;
5318 return false;
5320 else
5322 if (TARGET_MACHO)
5323 return false;
5325 /* Assuming that all other types are naturally aligned. CHECKME! */
5326 return true;
5330 /* Return true if the vector misalignment factor is supported by the
5331 target. */
5332 static bool
5333 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5334 const_tree type,
5335 int misalignment,
5336 bool is_packed)
5338 if (TARGET_VSX)
5340 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5341 return true;
5343 /* Return if movmisalign pattern is not supported for this mode. */
5344 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5345 return false;
5347 if (misalignment == -1)
5349 /* Misalignment factor is unknown at compile time but we know
5350 it's word aligned. */
5351 if (rs6000_vector_alignment_reachable (type, is_packed))
5353 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5355 if (element_size == 64 || element_size == 32)
5356 return true;
5359 return false;
5362 /* VSX supports word-aligned vector. */
5363 if (misalignment % 4 == 0)
5364 return true;
5366 return false;
5369 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5370 static int
5371 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5372 tree vectype, int misalign)
5374 unsigned elements;
5375 tree elem_type;
5377 switch (type_of_cost)
5379 case scalar_stmt:
5380 case scalar_load:
5381 case scalar_store:
5382 case vector_stmt:
5383 case vector_load:
5384 case vector_store:
5385 case vec_to_scalar:
5386 case scalar_to_vec:
5387 case cond_branch_not_taken:
5388 return 1;
5390 case vec_perm:
5391 if (TARGET_VSX)
5392 return 3;
5393 else
5394 return 1;
5396 case vec_promote_demote:
5397 if (TARGET_VSX)
5398 return 4;
5399 else
5400 return 1;
5402 case cond_branch_taken:
5403 return 3;
5405 case unaligned_load:
5406 case vector_gather_load:
5407 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5408 return 1;
5410 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5412 elements = TYPE_VECTOR_SUBPARTS (vectype);
5413 if (elements == 2)
5414 /* Double word aligned. */
5415 return 2;
5417 if (elements == 4)
5419 switch (misalign)
5421 case 8:
5422 /* Double word aligned. */
5423 return 2;
5425 case -1:
5426 /* Unknown misalignment. */
5427 case 4:
5428 case 12:
5429 /* Word aligned. */
5430 return 22;
5432 default:
5433 gcc_unreachable ();
5438 if (TARGET_ALTIVEC)
5439 /* Misaligned loads are not supported. */
5440 gcc_unreachable ();
5442 return 2;
5444 case unaligned_store:
5445 case vector_scatter_store:
5446 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5447 return 1;
5449 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5451 elements = TYPE_VECTOR_SUBPARTS (vectype);
5452 if (elements == 2)
5453 /* Double word aligned. */
5454 return 2;
5456 if (elements == 4)
5458 switch (misalign)
5460 case 8:
5461 /* Double word aligned. */
5462 return 2;
5464 case -1:
5465 /* Unknown misalignment. */
5466 case 4:
5467 case 12:
5468 /* Word aligned. */
5469 return 23;
5471 default:
5472 gcc_unreachable ();
5477 if (TARGET_ALTIVEC)
5478 /* Misaligned stores are not supported. */
5479 gcc_unreachable ();
5481 return 2;
5483 case vec_construct:
5484 /* This is a rough approximation assuming non-constant elements
5485 constructed into a vector via element insertion. FIXME:
5486 vec_construct is not granular enough for uniformly good
5487 decisions. If the initialization is a splat, this is
5488 cheaper than we estimate. Improve this someday. */
5489 elem_type = TREE_TYPE (vectype);
5490 /* 32-bit vectors loaded into registers are stored as double
5491 precision, so we need 2 permutes, 2 converts, and 1 merge
5492 to construct a vector of short floats from them. */
5493 if (SCALAR_FLOAT_TYPE_P (elem_type)
5494 && TYPE_PRECISION (elem_type) == 32)
5495 return 5;
5496 /* On POWER9, integer vector types are built up in GPRs and then
5497 use a direct move (2 cycles). For POWER8 this is even worse,
5498 as we need two direct moves and a merge, and the direct moves
5499 are five cycles. */
5500 else if (INTEGRAL_TYPE_P (elem_type))
5502 if (TARGET_P9_VECTOR)
5503 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5504 else
5505 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5507 else
5508 /* V2DFmode doesn't need a direct move. */
5509 return 2;
5511 default:
5512 gcc_unreachable ();
5516 /* Implement targetm.vectorize.preferred_simd_mode. */
5518 static machine_mode
5519 rs6000_preferred_simd_mode (scalar_mode mode)
5521 if (TARGET_VSX)
5522 switch (mode)
5524 case E_DFmode:
5525 return V2DFmode;
5526 default:;
5528 if (TARGET_ALTIVEC || TARGET_VSX)
5529 switch (mode)
5531 case E_SFmode:
5532 return V4SFmode;
5533 case E_TImode:
5534 return V1TImode;
5535 case E_DImode:
5536 return V2DImode;
5537 case E_SImode:
5538 return V4SImode;
5539 case E_HImode:
5540 return V8HImode;
5541 case E_QImode:
5542 return V16QImode;
5543 default:;
5545 return word_mode;
5548 typedef struct _rs6000_cost_data
5550 struct loop *loop_info;
5551 unsigned cost[3];
5552 } rs6000_cost_data;
5554 /* Test for likely overcommitment of vector hardware resources. If a
5555 loop iteration is relatively large, and too large a percentage of
5556 instructions in the loop are vectorized, the cost model may not
5557 adequately reflect delays from unavailable vector resources.
5558 Penalize the loop body cost for this case. */
5560 static void
5561 rs6000_density_test (rs6000_cost_data *data)
5563 const int DENSITY_PCT_THRESHOLD = 85;
5564 const int DENSITY_SIZE_THRESHOLD = 70;
5565 const int DENSITY_PENALTY = 10;
5566 struct loop *loop = data->loop_info;
5567 basic_block *bbs = get_loop_body (loop);
5568 int nbbs = loop->num_nodes;
5569 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5570 int i, density_pct;
5572 for (i = 0; i < nbbs; i++)
5574 basic_block bb = bbs[i];
5575 gimple_stmt_iterator gsi;
5577 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5579 gimple *stmt = gsi_stmt (gsi);
5580 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5582 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5583 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5584 not_vec_cost++;
5588 free (bbs);
5589 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5591 if (density_pct > DENSITY_PCT_THRESHOLD
5592 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5594 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_NOTE, vect_location,
5597 "density %d%%, cost %d exceeds threshold, penalizing "
5598 "loop body cost by %d%%", density_pct,
5599 vec_cost + not_vec_cost, DENSITY_PENALTY);
5603 /* Implement targetm.vectorize.init_cost. */
5605 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5606 instruction is needed by the vectorization. */
5607 static bool rs6000_vect_nonmem;
5609 static void *
5610 rs6000_init_cost (struct loop *loop_info)
5612 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5613 data->loop_info = loop_info;
5614 data->cost[vect_prologue] = 0;
5615 data->cost[vect_body] = 0;
5616 data->cost[vect_epilogue] = 0;
5617 rs6000_vect_nonmem = false;
5618 return data;
5621 /* Implement targetm.vectorize.add_stmt_cost. */
5623 static unsigned
5624 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5625 struct _stmt_vec_info *stmt_info, int misalign,
5626 enum vect_cost_model_location where)
5628 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5629 unsigned retval = 0;
5631 if (flag_vect_cost_model)
5633 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5634 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5635 misalign);
5636 /* Statements in an inner loop relative to the loop being
5637 vectorized are weighted more heavily. The value here is
5638 arbitrary and could potentially be improved with analysis. */
5639 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5640 count *= 50; /* FIXME. */
5642 retval = (unsigned) (count * stmt_cost);
5643 cost_data->cost[where] += retval;
5645 /* Check whether we're doing something other than just a copy loop.
5646 Not all such loops may be profitably vectorized; see
5647 rs6000_finish_cost. */
5648 if ((kind == vec_to_scalar || kind == vec_perm
5649 || kind == vec_promote_demote || kind == vec_construct
5650 || kind == scalar_to_vec)
5651 || (where == vect_body && kind == vector_stmt))
5652 rs6000_vect_nonmem = true;
5655 return retval;
5658 /* Implement targetm.vectorize.finish_cost. */
5660 static void
5661 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5662 unsigned *body_cost, unsigned *epilogue_cost)
5664 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5666 if (cost_data->loop_info)
5667 rs6000_density_test (cost_data);
5669 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5670 that require versioning for any reason. The vectorization is at
5671 best a wash inside the loop, and the versioning checks make
5672 profitability highly unlikely and potentially quite harmful. */
5673 if (cost_data->loop_info)
5675 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5676 if (!rs6000_vect_nonmem
5677 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5678 && LOOP_REQUIRES_VERSIONING (vec_info))
5679 cost_data->cost[vect_body] += 10000;
5682 *prologue_cost = cost_data->cost[vect_prologue];
5683 *body_cost = cost_data->cost[vect_body];
5684 *epilogue_cost = cost_data->cost[vect_epilogue];
5687 /* Implement targetm.vectorize.destroy_cost_data. */
5689 static void
5690 rs6000_destroy_cost_data (void *data)
5692 free (data);
5695 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5696 library with vectorized intrinsics. */
5698 static tree
5699 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5700 tree type_in)
5702 char name[32];
5703 const char *suffix = NULL;
5704 tree fntype, new_fndecl, bdecl = NULL_TREE;
5705 int n_args = 1;
5706 const char *bname;
5707 machine_mode el_mode, in_mode;
5708 int n, in_n;
5710 /* Libmass is suitable for unsafe math only as it does not correctly support
5711 parts of IEEE with the required precision such as denormals. Only support
5712 it if we have VSX to use the simd d2 or f4 functions.
5713 XXX: Add variable length support. */
5714 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5715 return NULL_TREE;
5717 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5718 n = TYPE_VECTOR_SUBPARTS (type_out);
5719 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5720 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5721 if (el_mode != in_mode
5722 || n != in_n)
5723 return NULL_TREE;
5725 switch (fn)
5727 CASE_CFN_ATAN2:
5728 CASE_CFN_HYPOT:
5729 CASE_CFN_POW:
5730 n_args = 2;
5731 gcc_fallthrough ();
5733 CASE_CFN_ACOS:
5734 CASE_CFN_ACOSH:
5735 CASE_CFN_ASIN:
5736 CASE_CFN_ASINH:
5737 CASE_CFN_ATAN:
5738 CASE_CFN_ATANH:
5739 CASE_CFN_CBRT:
5740 CASE_CFN_COS:
5741 CASE_CFN_COSH:
5742 CASE_CFN_ERF:
5743 CASE_CFN_ERFC:
5744 CASE_CFN_EXP2:
5745 CASE_CFN_EXP:
5746 CASE_CFN_EXPM1:
5747 CASE_CFN_LGAMMA:
5748 CASE_CFN_LOG10:
5749 CASE_CFN_LOG1P:
5750 CASE_CFN_LOG2:
5751 CASE_CFN_LOG:
5752 CASE_CFN_SIN:
5753 CASE_CFN_SINH:
5754 CASE_CFN_SQRT:
5755 CASE_CFN_TAN:
5756 CASE_CFN_TANH:
5757 if (el_mode == DFmode && n == 2)
5759 bdecl = mathfn_built_in (double_type_node, fn);
5760 suffix = "d2"; /* pow -> powd2 */
5762 else if (el_mode == SFmode && n == 4)
5764 bdecl = mathfn_built_in (float_type_node, fn);
5765 suffix = "4"; /* powf -> powf4 */
5767 else
5768 return NULL_TREE;
5769 if (!bdecl)
5770 return NULL_TREE;
5771 break;
5773 default:
5774 return NULL_TREE;
5777 gcc_assert (suffix != NULL);
5778 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5779 if (!bname)
5780 return NULL_TREE;
5782 strcpy (name, bname + sizeof ("__builtin_") - 1);
5783 strcat (name, suffix);
5785 if (n_args == 1)
5786 fntype = build_function_type_list (type_out, type_in, NULL);
5787 else if (n_args == 2)
5788 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5789 else
5790 gcc_unreachable ();
5792 /* Build a function declaration for the vectorized function. */
5793 new_fndecl = build_decl (BUILTINS_LOCATION,
5794 FUNCTION_DECL, get_identifier (name), fntype);
5795 TREE_PUBLIC (new_fndecl) = 1;
5796 DECL_EXTERNAL (new_fndecl) = 1;
5797 DECL_IS_NOVOPS (new_fndecl) = 1;
5798 TREE_READONLY (new_fndecl) = 1;
5800 return new_fndecl;
5803 /* Returns a function decl for a vectorized version of the builtin function
5804 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5805 if it is not available. */
5807 static tree
5808 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5809 tree type_in)
5811 machine_mode in_mode, out_mode;
5812 int in_n, out_n;
5814 if (TARGET_DEBUG_BUILTIN)
5815 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5816 combined_fn_name (combined_fn (fn)),
5817 GET_MODE_NAME (TYPE_MODE (type_out)),
5818 GET_MODE_NAME (TYPE_MODE (type_in)));
5820 if (TREE_CODE (type_out) != VECTOR_TYPE
5821 || TREE_CODE (type_in) != VECTOR_TYPE)
5822 return NULL_TREE;
5824 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5825 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5826 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5827 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5829 switch (fn)
5831 CASE_CFN_COPYSIGN:
5832 if (VECTOR_UNIT_VSX_P (V2DFmode)
5833 && out_mode == DFmode && out_n == 2
5834 && in_mode == DFmode && in_n == 2)
5835 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5836 if (VECTOR_UNIT_VSX_P (V4SFmode)
5837 && out_mode == SFmode && out_n == 4
5838 && in_mode == SFmode && in_n == 4)
5839 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5840 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5841 && out_mode == SFmode && out_n == 4
5842 && in_mode == SFmode && in_n == 4)
5843 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5844 break;
5845 CASE_CFN_CEIL:
5846 if (VECTOR_UNIT_VSX_P (V2DFmode)
5847 && out_mode == DFmode && out_n == 2
5848 && in_mode == DFmode && in_n == 2)
5849 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5850 if (VECTOR_UNIT_VSX_P (V4SFmode)
5851 && out_mode == SFmode && out_n == 4
5852 && in_mode == SFmode && in_n == 4)
5853 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5854 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5855 && out_mode == SFmode && out_n == 4
5856 && in_mode == SFmode && in_n == 4)
5857 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5858 break;
5859 CASE_CFN_FLOOR:
5860 if (VECTOR_UNIT_VSX_P (V2DFmode)
5861 && out_mode == DFmode && out_n == 2
5862 && in_mode == DFmode && in_n == 2)
5863 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5864 if (VECTOR_UNIT_VSX_P (V4SFmode)
5865 && out_mode == SFmode && out_n == 4
5866 && in_mode == SFmode && in_n == 4)
5867 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5868 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5869 && out_mode == SFmode && out_n == 4
5870 && in_mode == SFmode && in_n == 4)
5871 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5872 break;
5873 CASE_CFN_FMA:
5874 if (VECTOR_UNIT_VSX_P (V2DFmode)
5875 && out_mode == DFmode && out_n == 2
5876 && in_mode == DFmode && in_n == 2)
5877 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5878 if (VECTOR_UNIT_VSX_P (V4SFmode)
5879 && out_mode == SFmode && out_n == 4
5880 && in_mode == SFmode && in_n == 4)
5881 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5882 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5883 && out_mode == SFmode && out_n == 4
5884 && in_mode == SFmode && in_n == 4)
5885 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5886 break;
5887 CASE_CFN_TRUNC:
5888 if (VECTOR_UNIT_VSX_P (V2DFmode)
5889 && out_mode == DFmode && out_n == 2
5890 && in_mode == DFmode && in_n == 2)
5891 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5892 if (VECTOR_UNIT_VSX_P (V4SFmode)
5893 && out_mode == SFmode && out_n == 4
5894 && in_mode == SFmode && in_n == 4)
5895 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5896 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5897 && out_mode == SFmode && out_n == 4
5898 && in_mode == SFmode && in_n == 4)
5899 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5900 break;
5901 CASE_CFN_NEARBYINT:
5902 if (VECTOR_UNIT_VSX_P (V2DFmode)
5903 && flag_unsafe_math_optimizations
5904 && out_mode == DFmode && out_n == 2
5905 && in_mode == DFmode && in_n == 2)
5906 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5907 if (VECTOR_UNIT_VSX_P (V4SFmode)
5908 && flag_unsafe_math_optimizations
5909 && out_mode == SFmode && out_n == 4
5910 && in_mode == SFmode && in_n == 4)
5911 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5912 break;
5913 CASE_CFN_RINT:
5914 if (VECTOR_UNIT_VSX_P (V2DFmode)
5915 && !flag_trapping_math
5916 && out_mode == DFmode && out_n == 2
5917 && in_mode == DFmode && in_n == 2)
5918 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5919 if (VECTOR_UNIT_VSX_P (V4SFmode)
5920 && !flag_trapping_math
5921 && out_mode == SFmode && out_n == 4
5922 && in_mode == SFmode && in_n == 4)
5923 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5924 break;
5925 default:
5926 break;
5929 /* Generate calls to libmass if appropriate. */
5930 if (rs6000_veclib_handler)
5931 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5933 return NULL_TREE;
5936 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5938 static tree
5939 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5940 tree type_in)
5942 machine_mode in_mode, out_mode;
5943 int in_n, out_n;
5945 if (TARGET_DEBUG_BUILTIN)
5946 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5947 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5948 GET_MODE_NAME (TYPE_MODE (type_out)),
5949 GET_MODE_NAME (TYPE_MODE (type_in)));
5951 if (TREE_CODE (type_out) != VECTOR_TYPE
5952 || TREE_CODE (type_in) != VECTOR_TYPE)
5953 return NULL_TREE;
5955 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5956 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5957 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5958 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5960 enum rs6000_builtins fn
5961 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5962 switch (fn)
5964 case RS6000_BUILTIN_RSQRTF:
5965 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5966 && out_mode == SFmode && out_n == 4
5967 && in_mode == SFmode && in_n == 4)
5968 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5969 break;
5970 case RS6000_BUILTIN_RSQRT:
5971 if (VECTOR_UNIT_VSX_P (V2DFmode)
5972 && out_mode == DFmode && out_n == 2
5973 && in_mode == DFmode && in_n == 2)
5974 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5975 break;
5976 case RS6000_BUILTIN_RECIPF:
5977 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5978 && out_mode == SFmode && out_n == 4
5979 && in_mode == SFmode && in_n == 4)
5980 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5981 break;
5982 case RS6000_BUILTIN_RECIP:
5983 if (VECTOR_UNIT_VSX_P (V2DFmode)
5984 && out_mode == DFmode && out_n == 2
5985 && in_mode == DFmode && in_n == 2)
5986 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5987 break;
5988 default:
5989 break;
5991 return NULL_TREE;
5994 /* Default CPU string for rs6000*_file_start functions. */
5995 static const char *rs6000_default_cpu;
5997 /* Do anything needed at the start of the asm file. */
5999 static void
6000 rs6000_file_start (void)
6002 char buffer[80];
6003 const char *start = buffer;
6004 FILE *file = asm_out_file;
6006 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6008 default_file_start ();
6010 if (flag_verbose_asm)
6012 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6014 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6016 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6017 start = "";
6020 if (global_options_set.x_rs6000_cpu_index)
6022 fprintf (file, "%s -mcpu=%s", start,
6023 processor_target_table[rs6000_cpu_index].name);
6024 start = "";
6027 if (global_options_set.x_rs6000_tune_index)
6029 fprintf (file, "%s -mtune=%s", start,
6030 processor_target_table[rs6000_tune_index].name);
6031 start = "";
6034 if (PPC405_ERRATUM77)
6036 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6037 start = "";
6040 #ifdef USING_ELFOS_H
6041 switch (rs6000_sdata)
6043 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6044 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6045 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6046 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6049 if (rs6000_sdata && g_switch_value)
6051 fprintf (file, "%s -G %d", start,
6052 g_switch_value);
6053 start = "";
6055 #endif
6057 if (*start == '\0')
6058 putc ('\n', file);
6061 #ifdef USING_ELFOS_H
6062 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6063 && !global_options_set.x_rs6000_cpu_index)
6065 fputs ("\t.machine ", asm_out_file);
6066 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6067 fputs ("power9\n", asm_out_file);
6068 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6069 fputs ("power8\n", asm_out_file);
6070 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6071 fputs ("power7\n", asm_out_file);
6072 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6073 fputs ("power6\n", asm_out_file);
6074 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6075 fputs ("power5\n", asm_out_file);
6076 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6077 fputs ("power4\n", asm_out_file);
6078 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6079 fputs ("ppc64\n", asm_out_file);
6080 else
6081 fputs ("ppc\n", asm_out_file);
6083 #endif
6085 if (DEFAULT_ABI == ABI_ELFv2)
6086 fprintf (file, "\t.abiversion 2\n");
6090 /* Return nonzero if this function is known to have a null epilogue. */
6093 direct_return (void)
6095 if (reload_completed)
6097 rs6000_stack_t *info = rs6000_stack_info ();
6099 if (info->first_gp_reg_save == 32
6100 && info->first_fp_reg_save == 64
6101 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6102 && ! info->lr_save_p
6103 && ! info->cr_save_p
6104 && info->vrsave_size == 0
6105 && ! info->push_p)
6106 return 1;
6109 return 0;
6112 /* Return the number of instructions it takes to form a constant in an
6113 integer register. */
6116 num_insns_constant_wide (HOST_WIDE_INT value)
6118 /* signed constant loadable with addi */
6119 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6120 return 1;
6122 /* constant loadable with addis */
6123 else if ((value & 0xffff) == 0
6124 && (value >> 31 == -1 || value >> 31 == 0))
6125 return 1;
6127 else if (TARGET_POWERPC64)
6129 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6130 HOST_WIDE_INT high = value >> 31;
6132 if (high == 0 || high == -1)
6133 return 2;
6135 high >>= 1;
6137 if (low == 0)
6138 return num_insns_constant_wide (high) + 1;
6139 else if (high == 0)
6140 return num_insns_constant_wide (low) + 1;
6141 else
6142 return (num_insns_constant_wide (high)
6143 + num_insns_constant_wide (low) + 1);
6146 else
6147 return 2;
6151 num_insns_constant (rtx op, machine_mode mode)
6153 HOST_WIDE_INT low, high;
6155 switch (GET_CODE (op))
6157 case CONST_INT:
6158 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6159 && rs6000_is_valid_and_mask (op, mode))
6160 return 2;
6161 else
6162 return num_insns_constant_wide (INTVAL (op));
6164 case CONST_WIDE_INT:
6166 int i;
6167 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6168 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6169 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6170 return ins;
6173 case CONST_DOUBLE:
6174 if (mode == SFmode || mode == SDmode)
6176 long l;
6178 if (DECIMAL_FLOAT_MODE_P (mode))
6179 REAL_VALUE_TO_TARGET_DECIMAL32
6180 (*CONST_DOUBLE_REAL_VALUE (op), l);
6181 else
6182 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6183 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6186 long l[2];
6187 if (DECIMAL_FLOAT_MODE_P (mode))
6188 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6189 else
6190 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6191 high = l[WORDS_BIG_ENDIAN == 0];
6192 low = l[WORDS_BIG_ENDIAN != 0];
6194 if (TARGET_32BIT)
6195 return (num_insns_constant_wide (low)
6196 + num_insns_constant_wide (high));
6197 else
6199 if ((high == 0 && low >= 0)
6200 || (high == -1 && low < 0))
6201 return num_insns_constant_wide (low);
6203 else if (rs6000_is_valid_and_mask (op, mode))
6204 return 2;
6206 else if (low == 0)
6207 return num_insns_constant_wide (high) + 1;
6209 else
6210 return (num_insns_constant_wide (high)
6211 + num_insns_constant_wide (low) + 1);
6214 default:
6215 gcc_unreachable ();
6219 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6220 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6221 corresponding element of the vector, but for V4SFmode, the
6222 corresponding "float" is interpreted as an SImode integer. */
6224 HOST_WIDE_INT
6225 const_vector_elt_as_int (rtx op, unsigned int elt)
6227 rtx tmp;
6229 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6230 gcc_assert (GET_MODE (op) != V2DImode
6231 && GET_MODE (op) != V2DFmode);
6233 tmp = CONST_VECTOR_ELT (op, elt);
6234 if (GET_MODE (op) == V4SFmode)
6235 tmp = gen_lowpart (SImode, tmp);
6236 return INTVAL (tmp);
6239 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6240 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6241 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6242 all items are set to the same value and contain COPIES replicas of the
6243 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6244 operand and the others are set to the value of the operand's msb. */
6246 static bool
6247 vspltis_constant (rtx op, unsigned step, unsigned copies)
6249 machine_mode mode = GET_MODE (op);
6250 machine_mode inner = GET_MODE_INNER (mode);
6252 unsigned i;
6253 unsigned nunits;
6254 unsigned bitsize;
6255 unsigned mask;
6257 HOST_WIDE_INT val;
6258 HOST_WIDE_INT splat_val;
6259 HOST_WIDE_INT msb_val;
6261 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6262 return false;
6264 nunits = GET_MODE_NUNITS (mode);
6265 bitsize = GET_MODE_BITSIZE (inner);
6266 mask = GET_MODE_MASK (inner);
6268 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6269 splat_val = val;
6270 msb_val = val >= 0 ? 0 : -1;
6272 /* Construct the value to be splatted, if possible. If not, return 0. */
6273 for (i = 2; i <= copies; i *= 2)
6275 HOST_WIDE_INT small_val;
6276 bitsize /= 2;
6277 small_val = splat_val >> bitsize;
6278 mask >>= bitsize;
6279 if (splat_val != ((HOST_WIDE_INT)
6280 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6281 | (small_val & mask)))
6282 return false;
6283 splat_val = small_val;
6286 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6287 if (EASY_VECTOR_15 (splat_val))
6290 /* Also check if we can splat, and then add the result to itself. Do so if
6291 the value is positive, of if the splat instruction is using OP's mode;
6292 for splat_val < 0, the splat and the add should use the same mode. */
6293 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6294 && (splat_val >= 0 || (step == 1 && copies == 1)))
6297 /* Also check if are loading up the most significant bit which can be done by
6298 loading up -1 and shifting the value left by -1. */
6299 else if (EASY_VECTOR_MSB (splat_val, inner))
6302 else
6303 return false;
6305 /* Check if VAL is present in every STEP-th element, and the
6306 other elements are filled with its most significant bit. */
6307 for (i = 1; i < nunits; ++i)
6309 HOST_WIDE_INT desired_val;
6310 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6311 if ((i & (step - 1)) == 0)
6312 desired_val = val;
6313 else
6314 desired_val = msb_val;
6316 if (desired_val != const_vector_elt_as_int (op, elt))
6317 return false;
6320 return true;
6323 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6324 instruction, filling in the bottom elements with 0 or -1.
6326 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6327 for the number of zeroes to shift in, or negative for the number of 0xff
6328 bytes to shift in.
6330 OP is a CONST_VECTOR. */
6333 vspltis_shifted (rtx op)
6335 machine_mode mode = GET_MODE (op);
6336 machine_mode inner = GET_MODE_INNER (mode);
6338 unsigned i, j;
6339 unsigned nunits;
6340 unsigned mask;
6342 HOST_WIDE_INT val;
6344 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6345 return false;
6347 /* We need to create pseudo registers to do the shift, so don't recognize
6348 shift vector constants after reload. */
6349 if (!can_create_pseudo_p ())
6350 return false;
6352 nunits = GET_MODE_NUNITS (mode);
6353 mask = GET_MODE_MASK (inner);
6355 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6357 /* Check if the value can really be the operand of a vspltis[bhw]. */
6358 if (EASY_VECTOR_15 (val))
6361 /* Also check if we are loading up the most significant bit which can be done
6362 by loading up -1 and shifting the value left by -1. */
6363 else if (EASY_VECTOR_MSB (val, inner))
6366 else
6367 return 0;
6369 /* Check if VAL is present in every STEP-th element until we find elements
6370 that are 0 or all 1 bits. */
6371 for (i = 1; i < nunits; ++i)
6373 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6374 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6376 /* If the value isn't the splat value, check for the remaining elements
6377 being 0/-1. */
6378 if (val != elt_val)
6380 if (elt_val == 0)
6382 for (j = i+1; j < nunits; ++j)
6384 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6385 if (const_vector_elt_as_int (op, elt2) != 0)
6386 return 0;
6389 return (nunits - i) * GET_MODE_SIZE (inner);
6392 else if ((elt_val & mask) == mask)
6394 for (j = i+1; j < nunits; ++j)
6396 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6397 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6398 return 0;
6401 return -((nunits - i) * GET_MODE_SIZE (inner));
6404 else
6405 return 0;
6409 /* If all elements are equal, we don't need to do VLSDOI. */
6410 return 0;
6414 /* Return true if OP is of the given MODE and can be synthesized
6415 with a vspltisb, vspltish or vspltisw. */
6417 bool
6418 easy_altivec_constant (rtx op, machine_mode mode)
6420 unsigned step, copies;
6422 if (mode == VOIDmode)
6423 mode = GET_MODE (op);
6424 else if (mode != GET_MODE (op))
6425 return false;
6427 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6428 constants. */
6429 if (mode == V2DFmode)
6430 return zero_constant (op, mode);
6432 else if (mode == V2DImode)
6434 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6435 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6436 return false;
6438 if (zero_constant (op, mode))
6439 return true;
6441 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6442 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6443 return true;
6445 return false;
6448 /* V1TImode is a special container for TImode. Ignore for now. */
6449 else if (mode == V1TImode)
6450 return false;
6452 /* Start with a vspltisw. */
6453 step = GET_MODE_NUNITS (mode) / 4;
6454 copies = 1;
6456 if (vspltis_constant (op, step, copies))
6457 return true;
6459 /* Then try with a vspltish. */
6460 if (step == 1)
6461 copies <<= 1;
6462 else
6463 step >>= 1;
6465 if (vspltis_constant (op, step, copies))
6466 return true;
6468 /* And finally a vspltisb. */
6469 if (step == 1)
6470 copies <<= 1;
6471 else
6472 step >>= 1;
6474 if (vspltis_constant (op, step, copies))
6475 return true;
6477 if (vspltis_shifted (op) != 0)
6478 return true;
6480 return false;
6483 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6484 result is OP. Abort if it is not possible. */
6487 gen_easy_altivec_constant (rtx op)
6489 machine_mode mode = GET_MODE (op);
6490 int nunits = GET_MODE_NUNITS (mode);
6491 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6492 unsigned step = nunits / 4;
6493 unsigned copies = 1;
6495 /* Start with a vspltisw. */
6496 if (vspltis_constant (op, step, copies))
6497 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6499 /* Then try with a vspltish. */
6500 if (step == 1)
6501 copies <<= 1;
6502 else
6503 step >>= 1;
6505 if (vspltis_constant (op, step, copies))
6506 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6508 /* And finally a vspltisb. */
6509 if (step == 1)
6510 copies <<= 1;
6511 else
6512 step >>= 1;
6514 if (vspltis_constant (op, step, copies))
6515 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6517 gcc_unreachable ();
6520 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6521 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6523 Return the number of instructions needed (1 or 2) into the address pointed
6524 via NUM_INSNS_PTR.
6526 Return the constant that is being split via CONSTANT_PTR. */
6528 bool
6529 xxspltib_constant_p (rtx op,
6530 machine_mode mode,
6531 int *num_insns_ptr,
6532 int *constant_ptr)
6534 size_t nunits = GET_MODE_NUNITS (mode);
6535 size_t i;
6536 HOST_WIDE_INT value;
6537 rtx element;
6539 /* Set the returned values to out of bound values. */
6540 *num_insns_ptr = -1;
6541 *constant_ptr = 256;
6543 if (!TARGET_P9_VECTOR)
6544 return false;
6546 if (mode == VOIDmode)
6547 mode = GET_MODE (op);
6549 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6550 return false;
6552 /* Handle (vec_duplicate <constant>). */
6553 if (GET_CODE (op) == VEC_DUPLICATE)
6555 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6556 && mode != V2DImode)
6557 return false;
6559 element = XEXP (op, 0);
6560 if (!CONST_INT_P (element))
6561 return false;
6563 value = INTVAL (element);
6564 if (!IN_RANGE (value, -128, 127))
6565 return false;
6568 /* Handle (const_vector [...]). */
6569 else if (GET_CODE (op) == CONST_VECTOR)
6571 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6572 && mode != V2DImode)
6573 return false;
6575 element = CONST_VECTOR_ELT (op, 0);
6576 if (!CONST_INT_P (element))
6577 return false;
6579 value = INTVAL (element);
6580 if (!IN_RANGE (value, -128, 127))
6581 return false;
6583 for (i = 1; i < nunits; i++)
6585 element = CONST_VECTOR_ELT (op, i);
6586 if (!CONST_INT_P (element))
6587 return false;
6589 if (value != INTVAL (element))
6590 return false;
6594 /* Handle integer constants being loaded into the upper part of the VSX
6595 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6596 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6597 else if (CONST_INT_P (op))
6599 if (!SCALAR_INT_MODE_P (mode))
6600 return false;
6602 value = INTVAL (op);
6603 if (!IN_RANGE (value, -128, 127))
6604 return false;
6606 if (!IN_RANGE (value, -1, 0))
6608 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6609 return false;
6611 if (EASY_VECTOR_15 (value))
6612 return false;
6616 else
6617 return false;
6619 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6620 sign extend. Special case 0/-1 to allow getting any VSX register instead
6621 of an Altivec register. */
6622 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6623 && EASY_VECTOR_15 (value))
6624 return false;
6626 /* Return # of instructions and the constant byte for XXSPLTIB. */
6627 if (mode == V16QImode)
6628 *num_insns_ptr = 1;
6630 else if (IN_RANGE (value, -1, 0))
6631 *num_insns_ptr = 1;
6633 else
6634 *num_insns_ptr = 2;
6636 *constant_ptr = (int) value;
6637 return true;
6640 const char *
6641 output_vec_const_move (rtx *operands)
6643 int shift;
6644 machine_mode mode;
6645 rtx dest, vec;
6647 dest = operands[0];
6648 vec = operands[1];
6649 mode = GET_MODE (dest);
6651 if (TARGET_VSX)
6653 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6654 int xxspltib_value = 256;
6655 int num_insns = -1;
6657 if (zero_constant (vec, mode))
6659 if (TARGET_P9_VECTOR)
6660 return "xxspltib %x0,0";
6662 else if (dest_vmx_p)
6663 return "vspltisw %0,0";
6665 else
6666 return "xxlxor %x0,%x0,%x0";
6669 if (all_ones_constant (vec, mode))
6671 if (TARGET_P9_VECTOR)
6672 return "xxspltib %x0,255";
6674 else if (dest_vmx_p)
6675 return "vspltisw %0,-1";
6677 else if (TARGET_P8_VECTOR)
6678 return "xxlorc %x0,%x0,%x0";
6680 else
6681 gcc_unreachable ();
6684 if (TARGET_P9_VECTOR
6685 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6687 if (num_insns == 1)
6689 operands[2] = GEN_INT (xxspltib_value & 0xff);
6690 return "xxspltib %x0,%2";
6693 return "#";
6697 if (TARGET_ALTIVEC)
6699 rtx splat_vec;
6701 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6702 if (zero_constant (vec, mode))
6703 return "vspltisw %0,0";
6705 if (all_ones_constant (vec, mode))
6706 return "vspltisw %0,-1";
6708 /* Do we need to construct a value using VSLDOI? */
6709 shift = vspltis_shifted (vec);
6710 if (shift != 0)
6711 return "#";
6713 splat_vec = gen_easy_altivec_constant (vec);
6714 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6715 operands[1] = XEXP (splat_vec, 0);
6716 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6717 return "#";
6719 switch (GET_MODE (splat_vec))
6721 case E_V4SImode:
6722 return "vspltisw %0,%1";
6724 case E_V8HImode:
6725 return "vspltish %0,%1";
6727 case E_V16QImode:
6728 return "vspltisb %0,%1";
6730 default:
6731 gcc_unreachable ();
6735 gcc_unreachable ();
6738 /* Initialize vector TARGET to VALS. */
6740 void
6741 rs6000_expand_vector_init (rtx target, rtx vals)
6743 machine_mode mode = GET_MODE (target);
6744 machine_mode inner_mode = GET_MODE_INNER (mode);
6745 int n_elts = GET_MODE_NUNITS (mode);
6746 int n_var = 0, one_var = -1;
6747 bool all_same = true, all_const_zero = true;
6748 rtx x, mem;
6749 int i;
6751 for (i = 0; i < n_elts; ++i)
6753 x = XVECEXP (vals, 0, i);
6754 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6755 ++n_var, one_var = i;
6756 else if (x != CONST0_RTX (inner_mode))
6757 all_const_zero = false;
6759 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6760 all_same = false;
6763 if (n_var == 0)
6765 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6766 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6767 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6769 /* Zero register. */
6770 emit_move_insn (target, CONST0_RTX (mode));
6771 return;
6773 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6775 /* Splat immediate. */
6776 emit_insn (gen_rtx_SET (target, const_vec));
6777 return;
6779 else
6781 /* Load from constant pool. */
6782 emit_move_insn (target, const_vec);
6783 return;
6787 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6788 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6790 rtx op[2];
6791 size_t i;
6792 size_t num_elements = all_same ? 1 : 2;
6793 for (i = 0; i < num_elements; i++)
6795 op[i] = XVECEXP (vals, 0, i);
6796 /* Just in case there is a SUBREG with a smaller mode, do a
6797 conversion. */
6798 if (GET_MODE (op[i]) != inner_mode)
6800 rtx tmp = gen_reg_rtx (inner_mode);
6801 convert_move (tmp, op[i], 0);
6802 op[i] = tmp;
6804 /* Allow load with splat double word. */
6805 else if (MEM_P (op[i]))
6807 if (!all_same)
6808 op[i] = force_reg (inner_mode, op[i]);
6810 else if (!REG_P (op[i]))
6811 op[i] = force_reg (inner_mode, op[i]);
6814 if (all_same)
6816 if (mode == V2DFmode)
6817 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6818 else
6819 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6821 else
6823 if (mode == V2DFmode)
6824 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6825 else
6826 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6828 return;
6831 /* Special case initializing vector int if we are on 64-bit systems with
6832 direct move or we have the ISA 3.0 instructions. */
6833 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6834 && TARGET_DIRECT_MOVE_64BIT)
6836 if (all_same)
6838 rtx element0 = XVECEXP (vals, 0, 0);
6839 if (MEM_P (element0))
6840 element0 = rs6000_address_for_fpconvert (element0);
6841 else
6842 element0 = force_reg (SImode, element0);
6844 if (TARGET_P9_VECTOR)
6845 emit_insn (gen_vsx_splat_v4si (target, element0));
6846 else
6848 rtx tmp = gen_reg_rtx (DImode);
6849 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6850 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6852 return;
6854 else
6856 rtx elements[4];
6857 size_t i;
6859 for (i = 0; i < 4; i++)
6861 elements[i] = XVECEXP (vals, 0, i);
6862 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
6863 elements[i] = copy_to_mode_reg (SImode, elements[i]);
6866 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6867 elements[2], elements[3]));
6868 return;
6872 /* With single precision floating point on VSX, know that internally single
6873 precision is actually represented as a double, and either make 2 V2DF
6874 vectors, and convert these vectors to single precision, or do one
6875 conversion, and splat the result to the other elements. */
6876 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6878 if (all_same)
6880 rtx element0 = XVECEXP (vals, 0, 0);
6882 if (TARGET_P9_VECTOR)
6884 if (MEM_P (element0))
6885 element0 = rs6000_address_for_fpconvert (element0);
6887 emit_insn (gen_vsx_splat_v4sf (target, element0));
6890 else
6892 rtx freg = gen_reg_rtx (V4SFmode);
6893 rtx sreg = force_reg (SFmode, element0);
6894 rtx cvt = (TARGET_XSCVDPSPN
6895 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6896 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6898 emit_insn (cvt);
6899 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6900 const0_rtx));
6903 else
6905 rtx dbl_even = gen_reg_rtx (V2DFmode);
6906 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6907 rtx flt_even = gen_reg_rtx (V4SFmode);
6908 rtx flt_odd = gen_reg_rtx (V4SFmode);
6909 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6910 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6911 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6912 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6914 /* Use VMRGEW if we can instead of doing a permute. */
6915 if (TARGET_P8_VECTOR)
6917 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6918 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6919 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6920 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6921 if (BYTES_BIG_ENDIAN)
6922 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6923 else
6924 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6926 else
6928 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6929 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6930 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6931 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6932 rs6000_expand_extract_even (target, flt_even, flt_odd);
6935 return;
6938 /* Special case initializing vector short/char that are splats if we are on
6939 64-bit systems with direct move. */
6940 if (all_same && TARGET_DIRECT_MOVE_64BIT
6941 && (mode == V16QImode || mode == V8HImode))
6943 rtx op0 = XVECEXP (vals, 0, 0);
6944 rtx di_tmp = gen_reg_rtx (DImode);
6946 if (!REG_P (op0))
6947 op0 = force_reg (GET_MODE_INNER (mode), op0);
6949 if (mode == V16QImode)
6951 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6952 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6953 return;
6956 if (mode == V8HImode)
6958 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6959 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6960 return;
6964 /* Store value to stack temp. Load vector element. Splat. However, splat
6965 of 64-bit items is not supported on Altivec. */
6966 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6968 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6969 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6970 XVECEXP (vals, 0, 0));
6971 x = gen_rtx_UNSPEC (VOIDmode,
6972 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6973 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6974 gen_rtvec (2,
6975 gen_rtx_SET (target, mem),
6976 x)));
6977 x = gen_rtx_VEC_SELECT (inner_mode, target,
6978 gen_rtx_PARALLEL (VOIDmode,
6979 gen_rtvec (1, const0_rtx)));
6980 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6981 return;
6984 /* One field is non-constant. Load constant then overwrite
6985 varying field. */
6986 if (n_var == 1)
6988 rtx copy = copy_rtx (vals);
6990 /* Load constant part of vector, substitute neighboring value for
6991 varying element. */
6992 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6993 rs6000_expand_vector_init (target, copy);
6995 /* Insert variable. */
6996 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6997 return;
7000 /* Construct the vector in memory one field at a time
7001 and load the whole vector. */
7002 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7003 for (i = 0; i < n_elts; i++)
7004 emit_move_insn (adjust_address_nv (mem, inner_mode,
7005 i * GET_MODE_SIZE (inner_mode)),
7006 XVECEXP (vals, 0, i));
7007 emit_move_insn (target, mem);
7010 /* Set field ELT of TARGET to VAL. */
7012 void
7013 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7015 machine_mode mode = GET_MODE (target);
7016 machine_mode inner_mode = GET_MODE_INNER (mode);
7017 rtx reg = gen_reg_rtx (mode);
7018 rtx mask, mem, x;
7019 int width = GET_MODE_SIZE (inner_mode);
7020 int i;
7022 val = force_reg (GET_MODE (val), val);
7024 if (VECTOR_MEM_VSX_P (mode))
7026 rtx insn = NULL_RTX;
7027 rtx elt_rtx = GEN_INT (elt);
7029 if (mode == V2DFmode)
7030 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7032 else if (mode == V2DImode)
7033 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7035 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7037 if (mode == V4SImode)
7038 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7039 else if (mode == V8HImode)
7040 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7041 else if (mode == V16QImode)
7042 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7043 else if (mode == V4SFmode)
7044 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7047 if (insn)
7049 emit_insn (insn);
7050 return;
7054 /* Simplify setting single element vectors like V1TImode. */
7055 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7057 emit_move_insn (target, gen_lowpart (mode, val));
7058 return;
7061 /* Load single variable value. */
7062 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7063 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7064 x = gen_rtx_UNSPEC (VOIDmode,
7065 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7066 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7067 gen_rtvec (2,
7068 gen_rtx_SET (reg, mem),
7069 x)));
7071 /* Linear sequence. */
7072 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7073 for (i = 0; i < 16; ++i)
7074 XVECEXP (mask, 0, i) = GEN_INT (i);
7076 /* Set permute mask to insert element into target. */
7077 for (i = 0; i < width; ++i)
7078 XVECEXP (mask, 0, elt*width + i)
7079 = GEN_INT (i + 0x10);
7080 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7082 if (BYTES_BIG_ENDIAN)
7083 x = gen_rtx_UNSPEC (mode,
7084 gen_rtvec (3, target, reg,
7085 force_reg (V16QImode, x)),
7086 UNSPEC_VPERM);
7087 else
7089 if (TARGET_P9_VECTOR)
7090 x = gen_rtx_UNSPEC (mode,
7091 gen_rtvec (3, reg, target,
7092 force_reg (V16QImode, x)),
7093 UNSPEC_VPERMR);
7094 else
7096 /* Invert selector. We prefer to generate VNAND on P8 so
7097 that future fusion opportunities can kick in, but must
7098 generate VNOR elsewhere. */
7099 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7100 rtx iorx = (TARGET_P8_VECTOR
7101 ? gen_rtx_IOR (V16QImode, notx, notx)
7102 : gen_rtx_AND (V16QImode, notx, notx));
7103 rtx tmp = gen_reg_rtx (V16QImode);
7104 emit_insn (gen_rtx_SET (tmp, iorx));
7106 /* Permute with operands reversed and adjusted selector. */
7107 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7108 UNSPEC_VPERM);
7112 emit_insn (gen_rtx_SET (target, x));
7115 /* Extract field ELT from VEC into TARGET. */
7117 void
7118 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7120 machine_mode mode = GET_MODE (vec);
7121 machine_mode inner_mode = GET_MODE_INNER (mode);
7122 rtx mem;
7124 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7126 switch (mode)
7128 default:
7129 break;
7130 case E_V1TImode:
7131 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7132 emit_move_insn (target, gen_lowpart (TImode, vec));
7133 break;
7134 case E_V2DFmode:
7135 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7136 return;
7137 case E_V2DImode:
7138 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7139 return;
7140 case E_V4SFmode:
7141 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7142 return;
7143 case E_V16QImode:
7144 if (TARGET_DIRECT_MOVE_64BIT)
7146 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7147 return;
7149 else
7150 break;
7151 case E_V8HImode:
7152 if (TARGET_DIRECT_MOVE_64BIT)
7154 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7155 return;
7157 else
7158 break;
7159 case E_V4SImode:
7160 if (TARGET_DIRECT_MOVE_64BIT)
7162 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7163 return;
7165 break;
7168 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7169 && TARGET_DIRECT_MOVE_64BIT)
7171 if (GET_MODE (elt) != DImode)
7173 rtx tmp = gen_reg_rtx (DImode);
7174 convert_move (tmp, elt, 0);
7175 elt = tmp;
7177 else if (!REG_P (elt))
7178 elt = force_reg (DImode, elt);
7180 switch (mode)
7182 case E_V2DFmode:
7183 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7184 return;
7186 case E_V2DImode:
7187 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7188 return;
7190 case E_V4SFmode:
7191 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7192 return;
7194 case E_V4SImode:
7195 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7196 return;
7198 case E_V8HImode:
7199 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7200 return;
7202 case E_V16QImode:
7203 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7204 return;
7206 default:
7207 gcc_unreachable ();
7211 gcc_assert (CONST_INT_P (elt));
7213 /* Allocate mode-sized buffer. */
7214 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7216 emit_move_insn (mem, vec);
7218 /* Add offset to field within buffer matching vector element. */
7219 mem = adjust_address_nv (mem, inner_mode,
7220 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7222 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7225 /* Helper function to return the register number of a RTX. */
7226 static inline int
7227 regno_or_subregno (rtx op)
7229 if (REG_P (op))
7230 return REGNO (op);
7231 else if (SUBREG_P (op))
7232 return subreg_regno (op);
7233 else
7234 gcc_unreachable ();
7237 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7238 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7239 temporary (BASE_TMP) to fixup the address. Return the new memory address
7240 that is valid for reads or writes to a given register (SCALAR_REG). */
7243 rs6000_adjust_vec_address (rtx scalar_reg,
7244 rtx mem,
7245 rtx element,
7246 rtx base_tmp,
7247 machine_mode scalar_mode)
7249 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7250 rtx addr = XEXP (mem, 0);
7251 rtx element_offset;
7252 rtx new_addr;
7253 bool valid_addr_p;
7255 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7256 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7258 /* Calculate what we need to add to the address to get the element
7259 address. */
7260 if (CONST_INT_P (element))
7261 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7262 else
7264 int byte_shift = exact_log2 (scalar_size);
7265 gcc_assert (byte_shift >= 0);
7267 if (byte_shift == 0)
7268 element_offset = element;
7270 else
7272 if (TARGET_POWERPC64)
7273 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7274 else
7275 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7277 element_offset = base_tmp;
7281 /* Create the new address pointing to the element within the vector. If we
7282 are adding 0, we don't have to change the address. */
7283 if (element_offset == const0_rtx)
7284 new_addr = addr;
7286 /* A simple indirect address can be converted into a reg + offset
7287 address. */
7288 else if (REG_P (addr) || SUBREG_P (addr))
7289 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7291 /* Optimize D-FORM addresses with constant offset with a constant element, to
7292 include the element offset in the address directly. */
7293 else if (GET_CODE (addr) == PLUS)
7295 rtx op0 = XEXP (addr, 0);
7296 rtx op1 = XEXP (addr, 1);
7297 rtx insn;
7299 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7300 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7302 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7303 rtx offset_rtx = GEN_INT (offset);
7305 if (IN_RANGE (offset, -32768, 32767)
7306 && (scalar_size < 8 || (offset & 0x3) == 0))
7307 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7308 else
7310 emit_move_insn (base_tmp, offset_rtx);
7311 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7314 else
7316 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7317 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7319 /* Note, ADDI requires the register being added to be a base
7320 register. If the register was R0, load it up into the temporary
7321 and do the add. */
7322 if (op1_reg_p
7323 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7325 insn = gen_add3_insn (base_tmp, op1, element_offset);
7326 gcc_assert (insn != NULL_RTX);
7327 emit_insn (insn);
7330 else if (ele_reg_p
7331 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7333 insn = gen_add3_insn (base_tmp, element_offset, op1);
7334 gcc_assert (insn != NULL_RTX);
7335 emit_insn (insn);
7338 else
7340 emit_move_insn (base_tmp, op1);
7341 emit_insn (gen_add2_insn (base_tmp, element_offset));
7344 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7348 else
7350 emit_move_insn (base_tmp, addr);
7351 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7354 /* If we have a PLUS, we need to see whether the particular register class
7355 allows for D-FORM or X-FORM addressing. */
7356 if (GET_CODE (new_addr) == PLUS)
7358 rtx op1 = XEXP (new_addr, 1);
7359 addr_mask_type addr_mask;
7360 int scalar_regno = regno_or_subregno (scalar_reg);
7362 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7363 if (INT_REGNO_P (scalar_regno))
7364 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7366 else if (FP_REGNO_P (scalar_regno))
7367 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7369 else if (ALTIVEC_REGNO_P (scalar_regno))
7370 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7372 else
7373 gcc_unreachable ();
7375 if (REG_P (op1) || SUBREG_P (op1))
7376 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7377 else
7378 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7381 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7382 valid_addr_p = true;
7384 else
7385 valid_addr_p = false;
7387 if (!valid_addr_p)
7389 emit_move_insn (base_tmp, new_addr);
7390 new_addr = base_tmp;
7393 return change_address (mem, scalar_mode, new_addr);
7396 /* Split a variable vec_extract operation into the component instructions. */
7398 void
7399 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7400 rtx tmp_altivec)
7402 machine_mode mode = GET_MODE (src);
7403 machine_mode scalar_mode = GET_MODE (dest);
7404 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7405 int byte_shift = exact_log2 (scalar_size);
7407 gcc_assert (byte_shift >= 0);
7409 /* If we are given a memory address, optimize to load just the element. We
7410 don't have to adjust the vector element number on little endian
7411 systems. */
7412 if (MEM_P (src))
7414 gcc_assert (REG_P (tmp_gpr));
7415 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7416 tmp_gpr, scalar_mode));
7417 return;
7420 else if (REG_P (src) || SUBREG_P (src))
7422 int bit_shift = byte_shift + 3;
7423 rtx element2;
7424 int dest_regno = regno_or_subregno (dest);
7425 int src_regno = regno_or_subregno (src);
7426 int element_regno = regno_or_subregno (element);
7428 gcc_assert (REG_P (tmp_gpr));
7430 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7431 a general purpose register. */
7432 if (TARGET_P9_VECTOR
7433 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7434 && INT_REGNO_P (dest_regno)
7435 && ALTIVEC_REGNO_P (src_regno)
7436 && INT_REGNO_P (element_regno))
7438 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7439 rtx element_si = gen_rtx_REG (SImode, element_regno);
7441 if (mode == V16QImode)
7442 emit_insn (BYTES_BIG_ENDIAN
7443 ? gen_vextublx (dest_si, element_si, src)
7444 : gen_vextubrx (dest_si, element_si, src));
7446 else if (mode == V8HImode)
7448 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7449 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7450 emit_insn (BYTES_BIG_ENDIAN
7451 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7452 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7456 else
7458 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7459 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7460 emit_insn (BYTES_BIG_ENDIAN
7461 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7462 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7465 return;
7469 gcc_assert (REG_P (tmp_altivec));
7471 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7472 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7473 will shift the element into the upper position (adding 3 to convert a
7474 byte shift into a bit shift). */
7475 if (scalar_size == 8)
7477 if (!BYTES_BIG_ENDIAN)
7479 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7480 element2 = tmp_gpr;
7482 else
7483 element2 = element;
7485 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7486 bit. */
7487 emit_insn (gen_rtx_SET (tmp_gpr,
7488 gen_rtx_AND (DImode,
7489 gen_rtx_ASHIFT (DImode,
7490 element2,
7491 GEN_INT (6)),
7492 GEN_INT (64))));
7494 else
7496 if (!BYTES_BIG_ENDIAN)
7498 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7500 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7501 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7502 element2 = tmp_gpr;
7504 else
7505 element2 = element;
7507 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7510 /* Get the value into the lower byte of the Altivec register where VSLO
7511 expects it. */
7512 if (TARGET_P9_VECTOR)
7513 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7514 else if (can_create_pseudo_p ())
7515 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7516 else
7518 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7519 emit_move_insn (tmp_di, tmp_gpr);
7520 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7523 /* Do the VSLO to get the value into the final location. */
7524 switch (mode)
7526 case E_V2DFmode:
7527 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7528 return;
7530 case E_V2DImode:
7531 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7532 return;
7534 case E_V4SFmode:
7536 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7537 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7538 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7539 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7540 tmp_altivec));
7542 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7543 return;
7546 case E_V4SImode:
7547 case E_V8HImode:
7548 case E_V16QImode:
7550 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7551 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7552 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7553 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7554 tmp_altivec));
7555 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7556 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7557 GEN_INT (64 - (8 * scalar_size))));
7558 return;
7561 default:
7562 gcc_unreachable ();
7565 return;
7567 else
7568 gcc_unreachable ();
7571 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7572 two SImode values. */
7574 static void
7575 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7577 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7579 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7581 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7582 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7584 emit_move_insn (dest, GEN_INT (const1 | const2));
7585 return;
7588 /* Put si1 into upper 32-bits of dest. */
7589 if (CONST_INT_P (si1))
7590 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7591 else
7593 /* Generate RLDIC. */
7594 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7595 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7596 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7597 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7598 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7599 emit_insn (gen_rtx_SET (dest, and_rtx));
7602 /* Put si2 into the temporary. */
7603 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7604 if (CONST_INT_P (si2))
7605 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7606 else
7607 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7609 /* Combine the two parts. */
7610 emit_insn (gen_iordi3 (dest, dest, tmp));
7611 return;
7614 /* Split a V4SI initialization. */
7616 void
7617 rs6000_split_v4si_init (rtx operands[])
7619 rtx dest = operands[0];
7621 /* Destination is a GPR, build up the two DImode parts in place. */
7622 if (REG_P (dest) || SUBREG_P (dest))
7624 int d_regno = regno_or_subregno (dest);
7625 rtx scalar1 = operands[1];
7626 rtx scalar2 = operands[2];
7627 rtx scalar3 = operands[3];
7628 rtx scalar4 = operands[4];
7629 rtx tmp1 = operands[5];
7630 rtx tmp2 = operands[6];
7632 /* Even though we only need one temporary (plus the destination, which
7633 has an early clobber constraint, try to use two temporaries, one for
7634 each double word created. That way the 2nd insn scheduling pass can
7635 rearrange things so the two parts are done in parallel. */
7636 if (BYTES_BIG_ENDIAN)
7638 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7639 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7640 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7641 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7643 else
7645 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7646 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7647 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7648 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7650 return;
7653 else
7654 gcc_unreachable ();
7657 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7658 selects whether the alignment is abi mandated, optional, or
7659 both abi and optional alignment. */
7661 unsigned int
7662 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7664 if (how != align_opt)
7666 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7667 align = 128;
7670 if (how != align_abi)
7672 if (TREE_CODE (type) == ARRAY_TYPE
7673 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7675 if (align < BITS_PER_WORD)
7676 align = BITS_PER_WORD;
7680 return align;
7683 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7684 instructions simply ignore the low bits; VSX memory instructions
7685 are aligned to 4 or 8 bytes. */
7687 static bool
7688 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7690 return (STRICT_ALIGNMENT
7691 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7692 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7693 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7694 && (int) align < VECTOR_ALIGN (mode)))));
7697 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7699 bool
7700 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7702 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7704 if (computed != 128)
7706 static bool warned;
7707 if (!warned && warn_psabi)
7709 warned = true;
7710 inform (input_location,
7711 "the layout of aggregates containing vectors with"
7712 " %d-byte alignment has changed in GCC 5",
7713 computed / BITS_PER_UNIT);
7716 /* In current GCC there is no special case. */
7717 return false;
7720 return false;
7723 /* AIX increases natural record alignment to doubleword if the first
7724 field is an FP double while the FP fields remain word aligned. */
7726 unsigned int
7727 rs6000_special_round_type_align (tree type, unsigned int computed,
7728 unsigned int specified)
7730 unsigned int align = MAX (computed, specified);
7731 tree field = TYPE_FIELDS (type);
7733 /* Skip all non field decls */
7734 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7735 field = DECL_CHAIN (field);
7737 if (field != NULL && field != type)
7739 type = TREE_TYPE (field);
7740 while (TREE_CODE (type) == ARRAY_TYPE)
7741 type = TREE_TYPE (type);
7743 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7744 align = MAX (align, 64);
7747 return align;
7750 /* Darwin increases record alignment to the natural alignment of
7751 the first field. */
7753 unsigned int
7754 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7755 unsigned int specified)
7757 unsigned int align = MAX (computed, specified);
7759 if (TYPE_PACKED (type))
7760 return align;
7762 /* Find the first field, looking down into aggregates. */
7763 do {
7764 tree field = TYPE_FIELDS (type);
7765 /* Skip all non field decls */
7766 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7767 field = DECL_CHAIN (field);
7768 if (! field)
7769 break;
7770 /* A packed field does not contribute any extra alignment. */
7771 if (DECL_PACKED (field))
7772 return align;
7773 type = TREE_TYPE (field);
7774 while (TREE_CODE (type) == ARRAY_TYPE)
7775 type = TREE_TYPE (type);
7776 } while (AGGREGATE_TYPE_P (type));
7778 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7779 align = MAX (align, TYPE_ALIGN (type));
7781 return align;
7784 /* Return 1 for an operand in small memory on V.4/eabi. */
7787 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7788 machine_mode mode ATTRIBUTE_UNUSED)
7790 #if TARGET_ELF
7791 rtx sym_ref;
7793 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7794 return 0;
7796 if (DEFAULT_ABI != ABI_V4)
7797 return 0;
7799 if (GET_CODE (op) == SYMBOL_REF)
7800 sym_ref = op;
7802 else if (GET_CODE (op) != CONST
7803 || GET_CODE (XEXP (op, 0)) != PLUS
7804 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7805 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7806 return 0;
7808 else
7810 rtx sum = XEXP (op, 0);
7811 HOST_WIDE_INT summand;
7813 /* We have to be careful here, because it is the referenced address
7814 that must be 32k from _SDA_BASE_, not just the symbol. */
7815 summand = INTVAL (XEXP (sum, 1));
7816 if (summand < 0 || summand > g_switch_value)
7817 return 0;
7819 sym_ref = XEXP (sum, 0);
7822 return SYMBOL_REF_SMALL_P (sym_ref);
7823 #else
7824 return 0;
7825 #endif
7828 /* Return true if either operand is a general purpose register. */
7830 bool
7831 gpr_or_gpr_p (rtx op0, rtx op1)
7833 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7834 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7837 /* Return true if this is a move direct operation between GPR registers and
7838 floating point/VSX registers. */
7840 bool
7841 direct_move_p (rtx op0, rtx op1)
7843 int regno0, regno1;
7845 if (!REG_P (op0) || !REG_P (op1))
7846 return false;
7848 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7849 return false;
7851 regno0 = REGNO (op0);
7852 regno1 = REGNO (op1);
7853 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7854 return false;
7856 if (INT_REGNO_P (regno0))
7857 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7859 else if (INT_REGNO_P (regno1))
7861 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7862 return true;
7864 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7865 return true;
7868 return false;
7871 /* Return true if the OFFSET is valid for the quad address instructions that
7872 use d-form (register + offset) addressing. */
7874 static inline bool
7875 quad_address_offset_p (HOST_WIDE_INT offset)
7877 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7880 /* Return true if the ADDR is an acceptable address for a quad memory
7881 operation of mode MODE (either LQ/STQ for general purpose registers, or
7882 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7883 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7884 3.0 LXV/STXV instruction. */
7886 bool
7887 quad_address_p (rtx addr, machine_mode mode, bool strict)
7889 rtx op0, op1;
7891 if (GET_MODE_SIZE (mode) != 16)
7892 return false;
7894 if (legitimate_indirect_address_p (addr, strict))
7895 return true;
7897 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7898 return false;
7900 if (GET_CODE (addr) != PLUS)
7901 return false;
7903 op0 = XEXP (addr, 0);
7904 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7905 return false;
7907 op1 = XEXP (addr, 1);
7908 if (!CONST_INT_P (op1))
7909 return false;
7911 return quad_address_offset_p (INTVAL (op1));
7914 /* Return true if this is a load or store quad operation. This function does
7915 not handle the atomic quad memory instructions. */
7917 bool
7918 quad_load_store_p (rtx op0, rtx op1)
7920 bool ret;
7922 if (!TARGET_QUAD_MEMORY)
7923 ret = false;
7925 else if (REG_P (op0) && MEM_P (op1))
7926 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7927 && quad_memory_operand (op1, GET_MODE (op1))
7928 && !reg_overlap_mentioned_p (op0, op1));
7930 else if (MEM_P (op0) && REG_P (op1))
7931 ret = (quad_memory_operand (op0, GET_MODE (op0))
7932 && quad_int_reg_operand (op1, GET_MODE (op1)));
7934 else
7935 ret = false;
7937 if (TARGET_DEBUG_ADDR)
7939 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7940 ret ? "true" : "false");
7941 debug_rtx (gen_rtx_SET (op0, op1));
7944 return ret;
7947 /* Given an address, return a constant offset term if one exists. */
7949 static rtx
7950 address_offset (rtx op)
7952 if (GET_CODE (op) == PRE_INC
7953 || GET_CODE (op) == PRE_DEC)
7954 op = XEXP (op, 0);
7955 else if (GET_CODE (op) == PRE_MODIFY
7956 || GET_CODE (op) == LO_SUM)
7957 op = XEXP (op, 1);
7959 if (GET_CODE (op) == CONST)
7960 op = XEXP (op, 0);
7962 if (GET_CODE (op) == PLUS)
7963 op = XEXP (op, 1);
7965 if (CONST_INT_P (op))
7966 return op;
7968 return NULL_RTX;
7971 /* Return true if the MEM operand is a memory operand suitable for use
7972 with a (full width, possibly multiple) gpr load/store. On
7973 powerpc64 this means the offset must be divisible by 4.
7974 Implements 'Y' constraint.
7976 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7977 a constraint function we know the operand has satisfied a suitable
7978 memory predicate. Also accept some odd rtl generated by reload
7979 (see rs6000_legitimize_reload_address for various forms). It is
7980 important that reload rtl be accepted by appropriate constraints
7981 but not by the operand predicate.
7983 Offsetting a lo_sum should not be allowed, except where we know by
7984 alignment that a 32k boundary is not crossed, but see the ???
7985 comment in rs6000_legitimize_reload_address. Note that by
7986 "offsetting" here we mean a further offset to access parts of the
7987 MEM. It's fine to have a lo_sum where the inner address is offset
7988 from a sym, since the same sym+offset will appear in the high part
7989 of the address calculation. */
7991 bool
7992 mem_operand_gpr (rtx op, machine_mode mode)
7994 unsigned HOST_WIDE_INT offset;
7995 int extra;
7996 rtx addr = XEXP (op, 0);
7998 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7999 if (!rs6000_offsettable_memref_p (op, mode, false))
8000 return false;
8002 op = address_offset (addr);
8003 if (op == NULL_RTX)
8004 return true;
8006 offset = INTVAL (op);
8007 if (TARGET_POWERPC64 && (offset & 3) != 0)
8008 return false;
8010 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8011 if (extra < 0)
8012 extra = 0;
8014 if (GET_CODE (addr) == LO_SUM)
8015 /* For lo_sum addresses, we must allow any offset except one that
8016 causes a wrap, so test only the low 16 bits. */
8017 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8019 return offset + 0x8000 < 0x10000u - extra;
8022 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8023 enforce an offset divisible by 4 even for 32-bit. */
8025 bool
8026 mem_operand_ds_form (rtx op, machine_mode mode)
8028 unsigned HOST_WIDE_INT offset;
8029 int extra;
8030 rtx addr = XEXP (op, 0);
8032 if (!offsettable_address_p (false, mode, addr))
8033 return false;
8035 op = address_offset (addr);
8036 if (op == NULL_RTX)
8037 return true;
8039 offset = INTVAL (op);
8040 if ((offset & 3) != 0)
8041 return false;
8043 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8044 if (extra < 0)
8045 extra = 0;
8047 if (GET_CODE (addr) == LO_SUM)
8048 /* For lo_sum addresses, we must allow any offset except one that
8049 causes a wrap, so test only the low 16 bits. */
8050 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8052 return offset + 0x8000 < 0x10000u - extra;
8055 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8057 static bool
8058 reg_offset_addressing_ok_p (machine_mode mode)
8060 switch (mode)
8062 case E_V16QImode:
8063 case E_V8HImode:
8064 case E_V4SFmode:
8065 case E_V4SImode:
8066 case E_V2DFmode:
8067 case E_V2DImode:
8068 case E_V1TImode:
8069 case E_TImode:
8070 case E_TFmode:
8071 case E_KFmode:
8072 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8073 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8074 a vector mode, if we want to use the VSX registers to move it around,
8075 we need to restrict ourselves to reg+reg addressing. Similarly for
8076 IEEE 128-bit floating point that is passed in a single vector
8077 register. */
8078 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8079 return mode_supports_dq_form (mode);
8080 break;
8082 case E_SDmode:
8083 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8084 addressing for the LFIWZX and STFIWX instructions. */
8085 if (TARGET_NO_SDMODE_STACK)
8086 return false;
8087 break;
8089 default:
8090 break;
8093 return true;
8096 static bool
8097 virtual_stack_registers_memory_p (rtx op)
8099 int regnum;
8101 if (GET_CODE (op) == REG)
8102 regnum = REGNO (op);
8104 else if (GET_CODE (op) == PLUS
8105 && GET_CODE (XEXP (op, 0)) == REG
8106 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8107 regnum = REGNO (XEXP (op, 0));
8109 else
8110 return false;
8112 return (regnum >= FIRST_VIRTUAL_REGISTER
8113 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8116 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8117 is known to not straddle a 32k boundary. This function is used
8118 to determine whether -mcmodel=medium code can use TOC pointer
8119 relative addressing for OP. This means the alignment of the TOC
8120 pointer must also be taken into account, and unfortunately that is
8121 only 8 bytes. */
8123 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8124 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8125 #endif
8127 static bool
8128 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8129 machine_mode mode)
8131 tree decl;
8132 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8134 if (GET_CODE (op) != SYMBOL_REF)
8135 return false;
8137 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8138 SYMBOL_REF. */
8139 if (mode_supports_dq_form (mode))
8140 return false;
8142 dsize = GET_MODE_SIZE (mode);
8143 decl = SYMBOL_REF_DECL (op);
8144 if (!decl)
8146 if (dsize == 0)
8147 return false;
8149 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8150 replacing memory addresses with an anchor plus offset. We
8151 could find the decl by rummaging around in the block->objects
8152 VEC for the given offset but that seems like too much work. */
8153 dalign = BITS_PER_UNIT;
8154 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8155 && SYMBOL_REF_ANCHOR_P (op)
8156 && SYMBOL_REF_BLOCK (op) != NULL)
8158 struct object_block *block = SYMBOL_REF_BLOCK (op);
8160 dalign = block->alignment;
8161 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8163 else if (CONSTANT_POOL_ADDRESS_P (op))
8165 /* It would be nice to have get_pool_align().. */
8166 machine_mode cmode = get_pool_mode (op);
8168 dalign = GET_MODE_ALIGNMENT (cmode);
8171 else if (DECL_P (decl))
8173 dalign = DECL_ALIGN (decl);
8175 if (dsize == 0)
8177 /* Allow BLKmode when the entire object is known to not
8178 cross a 32k boundary. */
8179 if (!DECL_SIZE_UNIT (decl))
8180 return false;
8182 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8183 return false;
8185 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8186 if (dsize > 32768)
8187 return false;
8189 dalign /= BITS_PER_UNIT;
8190 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8191 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8192 return dalign >= dsize;
8195 else
8196 gcc_unreachable ();
8198 /* Find how many bits of the alignment we know for this access. */
8199 dalign /= BITS_PER_UNIT;
8200 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8201 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8202 mask = dalign - 1;
8203 lsb = offset & -offset;
8204 mask &= lsb - 1;
8205 dalign = mask + 1;
8207 return dalign >= dsize;
8210 static bool
8211 constant_pool_expr_p (rtx op)
8213 rtx base, offset;
8215 split_const (op, &base, &offset);
8216 return (GET_CODE (base) == SYMBOL_REF
8217 && CONSTANT_POOL_ADDRESS_P (base)
8218 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8221 /* These are only used to pass through from print_operand/print_operand_address
8222 to rs6000_output_addr_const_extra over the intervening function
8223 output_addr_const which is not target code. */
8224 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8226 /* Return true if OP is a toc pointer relative address (the output
8227 of create_TOC_reference). If STRICT, do not match non-split
8228 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8229 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8230 TOCREL_OFFSET_RET respectively. */
8232 bool
8233 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8234 const_rtx *tocrel_offset_ret)
8236 if (!TARGET_TOC)
8237 return false;
8239 if (TARGET_CMODEL != CMODEL_SMALL)
8241 /* When strict ensure we have everything tidy. */
8242 if (strict
8243 && !(GET_CODE (op) == LO_SUM
8244 && REG_P (XEXP (op, 0))
8245 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8246 return false;
8248 /* When not strict, allow non-split TOC addresses and also allow
8249 (lo_sum (high ..)) TOC addresses created during reload. */
8250 if (GET_CODE (op) == LO_SUM)
8251 op = XEXP (op, 1);
8254 const_rtx tocrel_base = op;
8255 const_rtx tocrel_offset = const0_rtx;
8257 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8259 tocrel_base = XEXP (op, 0);
8260 tocrel_offset = XEXP (op, 1);
8263 if (tocrel_base_ret)
8264 *tocrel_base_ret = tocrel_base;
8265 if (tocrel_offset_ret)
8266 *tocrel_offset_ret = tocrel_offset;
8268 return (GET_CODE (tocrel_base) == UNSPEC
8269 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8272 /* Return true if X is a constant pool address, and also for cmodel=medium
8273 if X is a toc-relative address known to be offsettable within MODE. */
8275 bool
8276 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8277 bool strict)
8279 const_rtx tocrel_base, tocrel_offset;
8280 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8281 && (TARGET_CMODEL != CMODEL_MEDIUM
8282 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8283 || mode == QImode
8284 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8285 INTVAL (tocrel_offset), mode)));
8288 static bool
8289 legitimate_small_data_p (machine_mode mode, rtx x)
8291 return (DEFAULT_ABI == ABI_V4
8292 && !flag_pic && !TARGET_TOC
8293 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8294 && small_data_operand (x, mode));
8297 bool
8298 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8299 bool strict, bool worst_case)
8301 unsigned HOST_WIDE_INT offset;
8302 unsigned int extra;
8304 if (GET_CODE (x) != PLUS)
8305 return false;
8306 if (!REG_P (XEXP (x, 0)))
8307 return false;
8308 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8309 return false;
8310 if (mode_supports_dq_form (mode))
8311 return quad_address_p (x, mode, strict);
8312 if (!reg_offset_addressing_ok_p (mode))
8313 return virtual_stack_registers_memory_p (x);
8314 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8315 return true;
8316 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8317 return false;
8319 offset = INTVAL (XEXP (x, 1));
8320 extra = 0;
8321 switch (mode)
8323 case E_DFmode:
8324 case E_DDmode:
8325 case E_DImode:
8326 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8327 addressing. */
8328 if (VECTOR_MEM_VSX_P (mode))
8329 return false;
8331 if (!worst_case)
8332 break;
8333 if (!TARGET_POWERPC64)
8334 extra = 4;
8335 else if (offset & 3)
8336 return false;
8337 break;
8339 case E_TFmode:
8340 case E_IFmode:
8341 case E_KFmode:
8342 case E_TDmode:
8343 case E_TImode:
8344 case E_PTImode:
8345 extra = 8;
8346 if (!worst_case)
8347 break;
8348 if (!TARGET_POWERPC64)
8349 extra = 12;
8350 else if (offset & 3)
8351 return false;
8352 break;
8354 default:
8355 break;
8358 offset += 0x8000;
8359 return offset < 0x10000 - extra;
8362 bool
8363 legitimate_indexed_address_p (rtx x, int strict)
8365 rtx op0, op1;
8367 if (GET_CODE (x) != PLUS)
8368 return false;
8370 op0 = XEXP (x, 0);
8371 op1 = XEXP (x, 1);
8373 return (REG_P (op0) && REG_P (op1)
8374 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8375 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8376 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8377 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8380 bool
8381 avoiding_indexed_address_p (machine_mode mode)
8383 /* Avoid indexed addressing for modes that have non-indexed
8384 load/store instruction forms. */
8385 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8388 bool
8389 legitimate_indirect_address_p (rtx x, int strict)
8391 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8394 bool
8395 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8397 if (!TARGET_MACHO || !flag_pic
8398 || mode != SImode || GET_CODE (x) != MEM)
8399 return false;
8400 x = XEXP (x, 0);
8402 if (GET_CODE (x) != LO_SUM)
8403 return false;
8404 if (GET_CODE (XEXP (x, 0)) != REG)
8405 return false;
8406 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8407 return false;
8408 x = XEXP (x, 1);
8410 return CONSTANT_P (x);
8413 static bool
8414 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8416 if (GET_CODE (x) != LO_SUM)
8417 return false;
8418 if (GET_CODE (XEXP (x, 0)) != REG)
8419 return false;
8420 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8421 return false;
8422 /* quad word addresses are restricted, and we can't use LO_SUM. */
8423 if (mode_supports_dq_form (mode))
8424 return false;
8425 x = XEXP (x, 1);
8427 if (TARGET_ELF || TARGET_MACHO)
8429 bool large_toc_ok;
8431 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8432 return false;
8433 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8434 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8435 recognizes some LO_SUM addresses as valid although this
8436 function says opposite. In most cases, LRA through different
8437 transformations can generate correct code for address reloads.
8438 It can not manage only some LO_SUM cases. So we need to add
8439 code analogous to one in rs6000_legitimize_reload_address for
8440 LOW_SUM here saying that some addresses are still valid. */
8441 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8442 && small_toc_ref (x, VOIDmode));
8443 if (TARGET_TOC && ! large_toc_ok)
8444 return false;
8445 if (GET_MODE_NUNITS (mode) != 1)
8446 return false;
8447 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8448 && !(/* ??? Assume floating point reg based on mode? */
8449 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8450 return false;
8452 return CONSTANT_P (x) || large_toc_ok;
8455 return false;
8459 /* Try machine-dependent ways of modifying an illegitimate address
8460 to be legitimate. If we find one, return the new, valid address.
8461 This is used from only one place: `memory_address' in explow.c.
8463 OLDX is the address as it was before break_out_memory_refs was
8464 called. In some cases it is useful to look at this to decide what
8465 needs to be done.
8467 It is always safe for this function to do nothing. It exists to
8468 recognize opportunities to optimize the output.
8470 On RS/6000, first check for the sum of a register with a constant
8471 integer that is out of range. If so, generate code to add the
8472 constant with the low-order 16 bits masked to the register and force
8473 this result into another register (this can be done with `cau').
8474 Then generate an address of REG+(CONST&0xffff), allowing for the
8475 possibility of bit 16 being a one.
8477 Then check for the sum of a register and something not constant, try to
8478 load the other things into a register and return the sum. */
8480 static rtx
8481 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8482 machine_mode mode)
8484 unsigned int extra;
8486 if (!reg_offset_addressing_ok_p (mode)
8487 || mode_supports_dq_form (mode))
8489 if (virtual_stack_registers_memory_p (x))
8490 return x;
8492 /* In theory we should not be seeing addresses of the form reg+0,
8493 but just in case it is generated, optimize it away. */
8494 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8495 return force_reg (Pmode, XEXP (x, 0));
8497 /* For TImode with load/store quad, restrict addresses to just a single
8498 pointer, so it works with both GPRs and VSX registers. */
8499 /* Make sure both operands are registers. */
8500 else if (GET_CODE (x) == PLUS
8501 && (mode != TImode || !TARGET_VSX))
8502 return gen_rtx_PLUS (Pmode,
8503 force_reg (Pmode, XEXP (x, 0)),
8504 force_reg (Pmode, XEXP (x, 1)));
8505 else
8506 return force_reg (Pmode, x);
8508 if (GET_CODE (x) == SYMBOL_REF)
8510 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8511 if (model != 0)
8512 return rs6000_legitimize_tls_address (x, model);
8515 extra = 0;
8516 switch (mode)
8518 case E_TFmode:
8519 case E_TDmode:
8520 case E_TImode:
8521 case E_PTImode:
8522 case E_IFmode:
8523 case E_KFmode:
8524 /* As in legitimate_offset_address_p we do not assume
8525 worst-case. The mode here is just a hint as to the registers
8526 used. A TImode is usually in gprs, but may actually be in
8527 fprs. Leave worst-case scenario for reload to handle via
8528 insn constraints. PTImode is only GPRs. */
8529 extra = 8;
8530 break;
8531 default:
8532 break;
8535 if (GET_CODE (x) == PLUS
8536 && GET_CODE (XEXP (x, 0)) == REG
8537 && GET_CODE (XEXP (x, 1)) == CONST_INT
8538 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8539 >= 0x10000 - extra))
8541 HOST_WIDE_INT high_int, low_int;
8542 rtx sum;
8543 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8544 if (low_int >= 0x8000 - extra)
8545 low_int = 0;
8546 high_int = INTVAL (XEXP (x, 1)) - low_int;
8547 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8548 GEN_INT (high_int)), 0);
8549 return plus_constant (Pmode, sum, low_int);
8551 else if (GET_CODE (x) == PLUS
8552 && GET_CODE (XEXP (x, 0)) == REG
8553 && GET_CODE (XEXP (x, 1)) != CONST_INT
8554 && GET_MODE_NUNITS (mode) == 1
8555 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8556 || (/* ??? Assume floating point reg based on mode? */
8557 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8558 && !avoiding_indexed_address_p (mode))
8560 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8561 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8563 else if ((TARGET_ELF
8564 #if TARGET_MACHO
8565 || !MACHO_DYNAMIC_NO_PIC_P
8566 #endif
8568 && TARGET_32BIT
8569 && TARGET_NO_TOC
8570 && ! flag_pic
8571 && GET_CODE (x) != CONST_INT
8572 && GET_CODE (x) != CONST_WIDE_INT
8573 && GET_CODE (x) != CONST_DOUBLE
8574 && CONSTANT_P (x)
8575 && GET_MODE_NUNITS (mode) == 1
8576 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8577 || (/* ??? Assume floating point reg based on mode? */
8578 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8580 rtx reg = gen_reg_rtx (Pmode);
8581 if (TARGET_ELF)
8582 emit_insn (gen_elf_high (reg, x));
8583 else
8584 emit_insn (gen_macho_high (reg, x));
8585 return gen_rtx_LO_SUM (Pmode, reg, x);
8587 else if (TARGET_TOC
8588 && GET_CODE (x) == SYMBOL_REF
8589 && constant_pool_expr_p (x)
8590 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8591 return create_TOC_reference (x, NULL_RTX);
8592 else
8593 return x;
8596 /* Debug version of rs6000_legitimize_address. */
8597 static rtx
8598 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8600 rtx ret;
8601 rtx_insn *insns;
8603 start_sequence ();
8604 ret = rs6000_legitimize_address (x, oldx, mode);
8605 insns = get_insns ();
8606 end_sequence ();
8608 if (ret != x)
8610 fprintf (stderr,
8611 "\nrs6000_legitimize_address: mode %s, old code %s, "
8612 "new code %s, modified\n",
8613 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8614 GET_RTX_NAME (GET_CODE (ret)));
8616 fprintf (stderr, "Original address:\n");
8617 debug_rtx (x);
8619 fprintf (stderr, "oldx:\n");
8620 debug_rtx (oldx);
8622 fprintf (stderr, "New address:\n");
8623 debug_rtx (ret);
8625 if (insns)
8627 fprintf (stderr, "Insns added:\n");
8628 debug_rtx_list (insns, 20);
8631 else
8633 fprintf (stderr,
8634 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8635 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8637 debug_rtx (x);
8640 if (insns)
8641 emit_insn (insns);
8643 return ret;
8646 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8647 We need to emit DTP-relative relocations. */
8649 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8650 static void
8651 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8653 switch (size)
8655 case 4:
8656 fputs ("\t.long\t", file);
8657 break;
8658 case 8:
8659 fputs (DOUBLE_INT_ASM_OP, file);
8660 break;
8661 default:
8662 gcc_unreachable ();
8664 output_addr_const (file, x);
8665 if (TARGET_ELF)
8666 fputs ("@dtprel+0x8000", file);
8667 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8669 switch (SYMBOL_REF_TLS_MODEL (x))
8671 case 0:
8672 break;
8673 case TLS_MODEL_LOCAL_EXEC:
8674 fputs ("@le", file);
8675 break;
8676 case TLS_MODEL_INITIAL_EXEC:
8677 fputs ("@ie", file);
8678 break;
8679 case TLS_MODEL_GLOBAL_DYNAMIC:
8680 case TLS_MODEL_LOCAL_DYNAMIC:
8681 fputs ("@m", file);
8682 break;
8683 default:
8684 gcc_unreachable ();
8689 /* Return true if X is a symbol that refers to real (rather than emulated)
8690 TLS. */
8692 static bool
8693 rs6000_real_tls_symbol_ref_p (rtx x)
8695 return (GET_CODE (x) == SYMBOL_REF
8696 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8699 /* In the name of slightly smaller debug output, and to cater to
8700 general assembler lossage, recognize various UNSPEC sequences
8701 and turn them back into a direct symbol reference. */
8703 static rtx
8704 rs6000_delegitimize_address (rtx orig_x)
8706 rtx x, y, offset;
8708 orig_x = delegitimize_mem_from_attrs (orig_x);
8709 x = orig_x;
8710 if (MEM_P (x))
8711 x = XEXP (x, 0);
8713 y = x;
8714 if (TARGET_CMODEL != CMODEL_SMALL
8715 && GET_CODE (y) == LO_SUM)
8716 y = XEXP (y, 1);
8718 offset = NULL_RTX;
8719 if (GET_CODE (y) == PLUS
8720 && GET_MODE (y) == Pmode
8721 && CONST_INT_P (XEXP (y, 1)))
8723 offset = XEXP (y, 1);
8724 y = XEXP (y, 0);
8727 if (GET_CODE (y) == UNSPEC
8728 && XINT (y, 1) == UNSPEC_TOCREL)
8730 y = XVECEXP (y, 0, 0);
8732 #ifdef HAVE_AS_TLS
8733 /* Do not associate thread-local symbols with the original
8734 constant pool symbol. */
8735 if (TARGET_XCOFF
8736 && GET_CODE (y) == SYMBOL_REF
8737 && CONSTANT_POOL_ADDRESS_P (y)
8738 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8739 return orig_x;
8740 #endif
8742 if (offset != NULL_RTX)
8743 y = gen_rtx_PLUS (Pmode, y, offset);
8744 if (!MEM_P (orig_x))
8745 return y;
8746 else
8747 return replace_equiv_address_nv (orig_x, y);
8750 if (TARGET_MACHO
8751 && GET_CODE (orig_x) == LO_SUM
8752 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8754 y = XEXP (XEXP (orig_x, 1), 0);
8755 if (GET_CODE (y) == UNSPEC
8756 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8757 return XVECEXP (y, 0, 0);
8760 return orig_x;
8763 /* Return true if X shouldn't be emitted into the debug info.
8764 The linker doesn't like .toc section references from
8765 .debug_* sections, so reject .toc section symbols. */
8767 static bool
8768 rs6000_const_not_ok_for_debug_p (rtx x)
8770 if (GET_CODE (x) == UNSPEC)
8771 return true;
8772 if (GET_CODE (x) == SYMBOL_REF
8773 && CONSTANT_POOL_ADDRESS_P (x))
8775 rtx c = get_pool_constant (x);
8776 machine_mode cmode = get_pool_mode (x);
8777 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8778 return true;
8781 return false;
8785 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8787 static bool
8788 rs6000_legitimate_combined_insn (rtx_insn *insn)
8790 int icode = INSN_CODE (insn);
8792 /* Reject creating doloop insns. Combine should not be allowed
8793 to create these for a number of reasons:
8794 1) In a nested loop, if combine creates one of these in an
8795 outer loop and the register allocator happens to allocate ctr
8796 to the outer loop insn, then the inner loop can't use ctr.
8797 Inner loops ought to be more highly optimized.
8798 2) Combine often wants to create one of these from what was
8799 originally a three insn sequence, first combining the three
8800 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8801 allocated ctr, the splitter takes use back to the three insn
8802 sequence. It's better to stop combine at the two insn
8803 sequence.
8804 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8805 insns, the register allocator sometimes uses floating point
8806 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8807 jump insn and output reloads are not implemented for jumps,
8808 the ctrsi/ctrdi splitters need to handle all possible cases.
8809 That's a pain, and it gets to be seriously difficult when a
8810 splitter that runs after reload needs memory to transfer from
8811 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8812 for the difficult case. It's better to not create problems
8813 in the first place. */
8814 if (icode != CODE_FOR_nothing
8815 && (icode == CODE_FOR_bdz_si
8816 || icode == CODE_FOR_bdz_di
8817 || icode == CODE_FOR_bdnz_si
8818 || icode == CODE_FOR_bdnz_di
8819 || icode == CODE_FOR_bdztf_si
8820 || icode == CODE_FOR_bdztf_di
8821 || icode == CODE_FOR_bdnztf_si
8822 || icode == CODE_FOR_bdnztf_di))
8823 return false;
8825 return true;
8828 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8830 static GTY(()) rtx rs6000_tls_symbol;
8831 static rtx
8832 rs6000_tls_get_addr (void)
8834 if (!rs6000_tls_symbol)
8835 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8837 return rs6000_tls_symbol;
8840 /* Construct the SYMBOL_REF for TLS GOT references. */
8842 static GTY(()) rtx rs6000_got_symbol;
8843 static rtx
8844 rs6000_got_sym (void)
8846 if (!rs6000_got_symbol)
8848 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8849 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8850 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8853 return rs6000_got_symbol;
8856 /* AIX Thread-Local Address support. */
8858 static rtx
8859 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8861 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8862 const char *name;
8863 char *tlsname;
8865 name = XSTR (addr, 0);
8866 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8867 or the symbol will be in TLS private data section. */
8868 if (name[strlen (name) - 1] != ']'
8869 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8870 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8872 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8873 strcpy (tlsname, name);
8874 strcat (tlsname,
8875 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8876 tlsaddr = copy_rtx (addr);
8877 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8879 else
8880 tlsaddr = addr;
8882 /* Place addr into TOC constant pool. */
8883 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8885 /* Output the TOC entry and create the MEM referencing the value. */
8886 if (constant_pool_expr_p (XEXP (sym, 0))
8887 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8889 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8890 mem = gen_const_mem (Pmode, tocref);
8891 set_mem_alias_set (mem, get_TOC_alias_set ());
8893 else
8894 return sym;
8896 /* Use global-dynamic for local-dynamic. */
8897 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8898 || model == TLS_MODEL_LOCAL_DYNAMIC)
8900 /* Create new TOC reference for @m symbol. */
8901 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8902 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8903 strcpy (tlsname, "*LCM");
8904 strcat (tlsname, name + 3);
8905 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8906 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8907 tocref = create_TOC_reference (modaddr, NULL_RTX);
8908 rtx modmem = gen_const_mem (Pmode, tocref);
8909 set_mem_alias_set (modmem, get_TOC_alias_set ());
8911 rtx modreg = gen_reg_rtx (Pmode);
8912 emit_insn (gen_rtx_SET (modreg, modmem));
8914 tmpreg = gen_reg_rtx (Pmode);
8915 emit_insn (gen_rtx_SET (tmpreg, mem));
8917 dest = gen_reg_rtx (Pmode);
8918 if (TARGET_32BIT)
8919 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8920 else
8921 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8922 return dest;
8924 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8925 else if (TARGET_32BIT)
8927 tlsreg = gen_reg_rtx (SImode);
8928 emit_insn (gen_tls_get_tpointer (tlsreg));
8930 else
8931 tlsreg = gen_rtx_REG (DImode, 13);
8933 /* Load the TOC value into temporary register. */
8934 tmpreg = gen_reg_rtx (Pmode);
8935 emit_insn (gen_rtx_SET (tmpreg, mem));
8936 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8937 gen_rtx_MINUS (Pmode, addr, tlsreg));
8939 /* Add TOC symbol value to TLS pointer. */
8940 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8942 return dest;
8945 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8946 this (thread-local) address. */
8948 static rtx
8949 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8951 rtx dest, insn;
8953 if (TARGET_XCOFF)
8954 return rs6000_legitimize_tls_address_aix (addr, model);
8956 dest = gen_reg_rtx (Pmode);
8957 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8959 rtx tlsreg;
8961 if (TARGET_64BIT)
8963 tlsreg = gen_rtx_REG (Pmode, 13);
8964 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8966 else
8968 tlsreg = gen_rtx_REG (Pmode, 2);
8969 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8971 emit_insn (insn);
8973 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8975 rtx tlsreg, tmp;
8977 tmp = gen_reg_rtx (Pmode);
8978 if (TARGET_64BIT)
8980 tlsreg = gen_rtx_REG (Pmode, 13);
8981 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8983 else
8985 tlsreg = gen_rtx_REG (Pmode, 2);
8986 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8988 emit_insn (insn);
8989 if (TARGET_64BIT)
8990 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8991 else
8992 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8993 emit_insn (insn);
8995 else
8997 rtx r3, got, tga, tmp1, tmp2, call_insn;
8999 /* We currently use relocations like @got@tlsgd for tls, which
9000 means the linker will handle allocation of tls entries, placing
9001 them in the .got section. So use a pointer to the .got section,
9002 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9003 or to secondary GOT sections used by 32-bit -fPIC. */
9004 if (TARGET_64BIT)
9005 got = gen_rtx_REG (Pmode, 2);
9006 else
9008 if (flag_pic == 1)
9009 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9010 else
9012 rtx gsym = rs6000_got_sym ();
9013 got = gen_reg_rtx (Pmode);
9014 if (flag_pic == 0)
9015 rs6000_emit_move (got, gsym, Pmode);
9016 else
9018 rtx mem, lab;
9020 tmp1 = gen_reg_rtx (Pmode);
9021 tmp2 = gen_reg_rtx (Pmode);
9022 mem = gen_const_mem (Pmode, tmp1);
9023 lab = gen_label_rtx ();
9024 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9025 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9026 if (TARGET_LINK_STACK)
9027 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9028 emit_move_insn (tmp2, mem);
9029 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9030 set_unique_reg_note (last, REG_EQUAL, gsym);
9035 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9037 tga = rs6000_tls_get_addr ();
9038 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9039 const0_rtx, Pmode);
9041 r3 = gen_rtx_REG (Pmode, 3);
9042 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9044 if (TARGET_64BIT)
9045 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9046 else
9047 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9049 else if (DEFAULT_ABI == ABI_V4)
9050 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9051 else
9052 gcc_unreachable ();
9053 call_insn = last_call_insn ();
9054 PATTERN (call_insn) = insn;
9055 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9056 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9057 pic_offset_table_rtx);
9059 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9061 tga = rs6000_tls_get_addr ();
9062 tmp1 = gen_reg_rtx (Pmode);
9063 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9064 const0_rtx, Pmode);
9066 r3 = gen_rtx_REG (Pmode, 3);
9067 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9069 if (TARGET_64BIT)
9070 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9071 else
9072 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9074 else if (DEFAULT_ABI == ABI_V4)
9075 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9076 else
9077 gcc_unreachable ();
9078 call_insn = last_call_insn ();
9079 PATTERN (call_insn) = insn;
9080 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9081 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9082 pic_offset_table_rtx);
9084 if (rs6000_tls_size == 16)
9086 if (TARGET_64BIT)
9087 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9088 else
9089 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9091 else if (rs6000_tls_size == 32)
9093 tmp2 = gen_reg_rtx (Pmode);
9094 if (TARGET_64BIT)
9095 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9096 else
9097 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9098 emit_insn (insn);
9099 if (TARGET_64BIT)
9100 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9101 else
9102 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9104 else
9106 tmp2 = gen_reg_rtx (Pmode);
9107 if (TARGET_64BIT)
9108 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9109 else
9110 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9111 emit_insn (insn);
9112 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9114 emit_insn (insn);
9116 else
9118 /* IE, or 64-bit offset LE. */
9119 tmp2 = gen_reg_rtx (Pmode);
9120 if (TARGET_64BIT)
9121 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9122 else
9123 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9124 emit_insn (insn);
9125 if (TARGET_64BIT)
9126 insn = gen_tls_tls_64 (dest, tmp2, addr);
9127 else
9128 insn = gen_tls_tls_32 (dest, tmp2, addr);
9129 emit_insn (insn);
9133 return dest;
9136 /* Only create the global variable for the stack protect guard if we are using
9137 the global flavor of that guard. */
9138 static tree
9139 rs6000_init_stack_protect_guard (void)
9141 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9142 return default_stack_protect_guard ();
9144 return NULL_TREE;
9147 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9149 static bool
9150 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9152 if (GET_CODE (x) == HIGH
9153 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9154 return true;
9156 /* A TLS symbol in the TOC cannot contain a sum. */
9157 if (GET_CODE (x) == CONST
9158 && GET_CODE (XEXP (x, 0)) == PLUS
9159 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9160 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9161 return true;
9163 /* Do not place an ELF TLS symbol in the constant pool. */
9164 return TARGET_ELF && tls_referenced_p (x);
9167 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9168 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9169 can be addressed relative to the toc pointer. */
9171 static bool
9172 use_toc_relative_ref (rtx sym, machine_mode mode)
9174 return ((constant_pool_expr_p (sym)
9175 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9176 get_pool_mode (sym)))
9177 || (TARGET_CMODEL == CMODEL_MEDIUM
9178 && SYMBOL_REF_LOCAL_P (sym)
9179 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9182 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9183 replace the input X, or the original X if no replacement is called for.
9184 The output parameter *WIN is 1 if the calling macro should goto WIN,
9185 0 if it should not.
9187 For RS/6000, we wish to handle large displacements off a base
9188 register by splitting the addend across an addiu/addis and the mem insn.
9189 This cuts number of extra insns needed from 3 to 1.
9191 On Darwin, we use this to generate code for floating point constants.
9192 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9193 The Darwin code is inside #if TARGET_MACHO because only then are the
9194 machopic_* functions defined. */
9195 static rtx
9196 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9197 int opnum, int type,
9198 int ind_levels ATTRIBUTE_UNUSED, int *win)
9200 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9201 bool quad_offset_p = mode_supports_dq_form (mode);
9203 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9204 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9205 if (reg_offset_p
9206 && opnum == 1
9207 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9208 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9209 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9210 && TARGET_P9_VECTOR)
9211 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9212 && TARGET_P9_VECTOR)))
9213 reg_offset_p = false;
9215 /* We must recognize output that we have already generated ourselves. */
9216 if (GET_CODE (x) == PLUS
9217 && GET_CODE (XEXP (x, 0)) == PLUS
9218 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9219 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9220 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9222 if (TARGET_DEBUG_ADDR)
9224 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9225 debug_rtx (x);
9227 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9228 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9229 opnum, (enum reload_type) type);
9230 *win = 1;
9231 return x;
9234 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9235 if (GET_CODE (x) == LO_SUM
9236 && GET_CODE (XEXP (x, 0)) == HIGH)
9238 if (TARGET_DEBUG_ADDR)
9240 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9241 debug_rtx (x);
9243 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9244 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9245 opnum, (enum reload_type) type);
9246 *win = 1;
9247 return x;
9250 #if TARGET_MACHO
9251 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9252 && GET_CODE (x) == LO_SUM
9253 && GET_CODE (XEXP (x, 0)) == PLUS
9254 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9255 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9256 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9257 && machopic_operand_p (XEXP (x, 1)))
9259 /* Result of previous invocation of this function on Darwin
9260 floating point constant. */
9261 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9262 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9263 opnum, (enum reload_type) type);
9264 *win = 1;
9265 return x;
9267 #endif
9269 if (TARGET_CMODEL != CMODEL_SMALL
9270 && reg_offset_p
9271 && !quad_offset_p
9272 && small_toc_ref (x, VOIDmode))
9274 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9275 x = gen_rtx_LO_SUM (Pmode, hi, x);
9276 if (TARGET_DEBUG_ADDR)
9278 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9279 debug_rtx (x);
9281 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9282 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9283 opnum, (enum reload_type) type);
9284 *win = 1;
9285 return x;
9288 if (GET_CODE (x) == PLUS
9289 && REG_P (XEXP (x, 0))
9290 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9291 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9292 && CONST_INT_P (XEXP (x, 1))
9293 && reg_offset_p
9294 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9296 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9297 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9298 HOST_WIDE_INT high
9299 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9301 /* Check for 32-bit overflow or quad addresses with one of the
9302 four least significant bits set. */
9303 if (high + low != val
9304 || (quad_offset_p && (low & 0xf)))
9306 *win = 0;
9307 return x;
9310 /* Reload the high part into a base reg; leave the low part
9311 in the mem directly. */
9313 x = gen_rtx_PLUS (GET_MODE (x),
9314 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9315 GEN_INT (high)),
9316 GEN_INT (low));
9318 if (TARGET_DEBUG_ADDR)
9320 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9321 debug_rtx (x);
9323 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9324 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9325 opnum, (enum reload_type) type);
9326 *win = 1;
9327 return x;
9330 if (GET_CODE (x) == SYMBOL_REF
9331 && reg_offset_p
9332 && !quad_offset_p
9333 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9334 #if TARGET_MACHO
9335 && DEFAULT_ABI == ABI_DARWIN
9336 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9337 && machopic_symbol_defined_p (x)
9338 #else
9339 && DEFAULT_ABI == ABI_V4
9340 && !flag_pic
9341 #endif
9342 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9343 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9344 without fprs.
9345 ??? Assume floating point reg based on mode? This assumption is
9346 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9347 where reload ends up doing a DFmode load of a constant from
9348 mem using two gprs. Unfortunately, at this point reload
9349 hasn't yet selected regs so poking around in reload data
9350 won't help and even if we could figure out the regs reliably,
9351 we'd still want to allow this transformation when the mem is
9352 naturally aligned. Since we say the address is good here, we
9353 can't disable offsets from LO_SUMs in mem_operand_gpr.
9354 FIXME: Allow offset from lo_sum for other modes too, when
9355 mem is sufficiently aligned.
9357 Also disallow this if the type can go in VMX/Altivec registers, since
9358 those registers do not have d-form (reg+offset) address modes. */
9359 && !reg_addr[mode].scalar_in_vmx_p
9360 && mode != TFmode
9361 && mode != TDmode
9362 && mode != IFmode
9363 && mode != KFmode
9364 && (mode != TImode || !TARGET_VSX)
9365 && mode != PTImode
9366 && (mode != DImode || TARGET_POWERPC64)
9367 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9368 || TARGET_HARD_FLOAT))
9370 #if TARGET_MACHO
9371 if (flag_pic)
9373 rtx offset = machopic_gen_offset (x);
9374 x = gen_rtx_LO_SUM (GET_MODE (x),
9375 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9376 gen_rtx_HIGH (Pmode, offset)), offset);
9378 else
9379 #endif
9380 x = gen_rtx_LO_SUM (GET_MODE (x),
9381 gen_rtx_HIGH (Pmode, x), x);
9383 if (TARGET_DEBUG_ADDR)
9385 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9386 debug_rtx (x);
9388 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9389 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9390 opnum, (enum reload_type) type);
9391 *win = 1;
9392 return x;
9395 /* Reload an offset address wrapped by an AND that represents the
9396 masking of the lower bits. Strip the outer AND and let reload
9397 convert the offset address into an indirect address. For VSX,
9398 force reload to create the address with an AND in a separate
9399 register, because we can't guarantee an altivec register will
9400 be used. */
9401 if (VECTOR_MEM_ALTIVEC_P (mode)
9402 && GET_CODE (x) == AND
9403 && GET_CODE (XEXP (x, 0)) == PLUS
9404 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9405 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9406 && GET_CODE (XEXP (x, 1)) == CONST_INT
9407 && INTVAL (XEXP (x, 1)) == -16)
9409 x = XEXP (x, 0);
9410 *win = 1;
9411 return x;
9414 if (TARGET_TOC
9415 && reg_offset_p
9416 && !quad_offset_p
9417 && GET_CODE (x) == SYMBOL_REF
9418 && use_toc_relative_ref (x, mode))
9420 x = create_TOC_reference (x, NULL_RTX);
9421 if (TARGET_CMODEL != CMODEL_SMALL)
9423 if (TARGET_DEBUG_ADDR)
9425 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9426 debug_rtx (x);
9428 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9429 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9430 opnum, (enum reload_type) type);
9432 *win = 1;
9433 return x;
9435 *win = 0;
9436 return x;
9439 /* Debug version of rs6000_legitimize_reload_address. */
9440 static rtx
9441 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9442 int opnum, int type,
9443 int ind_levels, int *win)
9445 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9446 ind_levels, win);
9447 fprintf (stderr,
9448 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9449 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9450 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9451 debug_rtx (x);
9453 if (x == ret)
9454 fprintf (stderr, "Same address returned\n");
9455 else if (!ret)
9456 fprintf (stderr, "NULL returned\n");
9457 else
9459 fprintf (stderr, "New address:\n");
9460 debug_rtx (ret);
9463 return ret;
9466 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9467 that is a valid memory address for an instruction.
9468 The MODE argument is the machine mode for the MEM expression
9469 that wants to use this address.
9471 On the RS/6000, there are four valid address: a SYMBOL_REF that
9472 refers to a constant pool entry of an address (or the sum of it
9473 plus a constant), a short (16-bit signed) constant plus a register,
9474 the sum of two registers, or a register indirect, possibly with an
9475 auto-increment. For DFmode, DDmode and DImode with a constant plus
9476 register, we must ensure that both words are addressable or PowerPC64
9477 with offset word aligned.
9479 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9480 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9481 because adjacent memory cells are accessed by adding word-sized offsets
9482 during assembly output. */
9483 static bool
9484 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9486 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9487 bool quad_offset_p = mode_supports_dq_form (mode);
9489 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9490 if (VECTOR_MEM_ALTIVEC_P (mode)
9491 && GET_CODE (x) == AND
9492 && GET_CODE (XEXP (x, 1)) == CONST_INT
9493 && INTVAL (XEXP (x, 1)) == -16)
9494 x = XEXP (x, 0);
9496 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9497 return 0;
9498 if (legitimate_indirect_address_p (x, reg_ok_strict))
9499 return 1;
9500 if (TARGET_UPDATE
9501 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9502 && mode_supports_pre_incdec_p (mode)
9503 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9504 return 1;
9505 /* Handle restricted vector d-form offsets in ISA 3.0. */
9506 if (quad_offset_p)
9508 if (quad_address_p (x, mode, reg_ok_strict))
9509 return 1;
9511 else if (virtual_stack_registers_memory_p (x))
9512 return 1;
9514 else if (reg_offset_p)
9516 if (legitimate_small_data_p (mode, x))
9517 return 1;
9518 if (legitimate_constant_pool_address_p (x, mode,
9519 reg_ok_strict || lra_in_progress))
9520 return 1;
9521 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9522 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9523 return 1;
9526 /* For TImode, if we have TImode in VSX registers, only allow register
9527 indirect addresses. This will allow the values to go in either GPRs
9528 or VSX registers without reloading. The vector types would tend to
9529 go into VSX registers, so we allow REG+REG, while TImode seems
9530 somewhat split, in that some uses are GPR based, and some VSX based. */
9531 /* FIXME: We could loosen this by changing the following to
9532 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9533 but currently we cannot allow REG+REG addressing for TImode. See
9534 PR72827 for complete details on how this ends up hoodwinking DSE. */
9535 if (mode == TImode && TARGET_VSX)
9536 return 0;
9537 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9538 if (! reg_ok_strict
9539 && reg_offset_p
9540 && GET_CODE (x) == PLUS
9541 && GET_CODE (XEXP (x, 0)) == REG
9542 && (XEXP (x, 0) == virtual_stack_vars_rtx
9543 || XEXP (x, 0) == arg_pointer_rtx)
9544 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9545 return 1;
9546 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9547 return 1;
9548 if (!FLOAT128_2REG_P (mode)
9549 && (TARGET_HARD_FLOAT
9550 || TARGET_POWERPC64
9551 || (mode != DFmode && mode != DDmode))
9552 && (TARGET_POWERPC64 || mode != DImode)
9553 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9554 && mode != PTImode
9555 && !avoiding_indexed_address_p (mode)
9556 && legitimate_indexed_address_p (x, reg_ok_strict))
9557 return 1;
9558 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9559 && mode_supports_pre_modify_p (mode)
9560 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9561 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9562 reg_ok_strict, false)
9563 || (!avoiding_indexed_address_p (mode)
9564 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9565 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9566 return 1;
9567 if (reg_offset_p && !quad_offset_p
9568 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9569 return 1;
9570 return 0;
9573 /* Debug version of rs6000_legitimate_address_p. */
9574 static bool
9575 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9576 bool reg_ok_strict)
9578 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9579 fprintf (stderr,
9580 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9581 "strict = %d, reload = %s, code = %s\n",
9582 ret ? "true" : "false",
9583 GET_MODE_NAME (mode),
9584 reg_ok_strict,
9585 (reload_completed ? "after" : "before"),
9586 GET_RTX_NAME (GET_CODE (x)));
9587 debug_rtx (x);
9589 return ret;
9592 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9594 static bool
9595 rs6000_mode_dependent_address_p (const_rtx addr,
9596 addr_space_t as ATTRIBUTE_UNUSED)
9598 return rs6000_mode_dependent_address_ptr (addr);
9601 /* Go to LABEL if ADDR (a legitimate address expression)
9602 has an effect that depends on the machine mode it is used for.
9604 On the RS/6000 this is true of all integral offsets (since AltiVec
9605 and VSX modes don't allow them) or is a pre-increment or decrement.
9607 ??? Except that due to conceptual problems in offsettable_address_p
9608 we can't really report the problems of integral offsets. So leave
9609 this assuming that the adjustable offset must be valid for the
9610 sub-words of a TFmode operand, which is what we had before. */
9612 static bool
9613 rs6000_mode_dependent_address (const_rtx addr)
9615 switch (GET_CODE (addr))
9617 case PLUS:
9618 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9619 is considered a legitimate address before reload, so there
9620 are no offset restrictions in that case. Note that this
9621 condition is safe in strict mode because any address involving
9622 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9623 been rejected as illegitimate. */
9624 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9625 && XEXP (addr, 0) != arg_pointer_rtx
9626 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9628 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9629 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9631 break;
9633 case LO_SUM:
9634 /* Anything in the constant pool is sufficiently aligned that
9635 all bytes have the same high part address. */
9636 return !legitimate_constant_pool_address_p (addr, QImode, false);
9638 /* Auto-increment cases are now treated generically in recog.c. */
9639 case PRE_MODIFY:
9640 return TARGET_UPDATE;
9642 /* AND is only allowed in Altivec loads. */
9643 case AND:
9644 return true;
9646 default:
9647 break;
9650 return false;
9653 /* Debug version of rs6000_mode_dependent_address. */
9654 static bool
9655 rs6000_debug_mode_dependent_address (const_rtx addr)
9657 bool ret = rs6000_mode_dependent_address (addr);
9659 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9660 ret ? "true" : "false");
9661 debug_rtx (addr);
9663 return ret;
9666 /* Implement FIND_BASE_TERM. */
9669 rs6000_find_base_term (rtx op)
9671 rtx base;
9673 base = op;
9674 if (GET_CODE (base) == CONST)
9675 base = XEXP (base, 0);
9676 if (GET_CODE (base) == PLUS)
9677 base = XEXP (base, 0);
9678 if (GET_CODE (base) == UNSPEC)
9679 switch (XINT (base, 1))
9681 case UNSPEC_TOCREL:
9682 case UNSPEC_MACHOPIC_OFFSET:
9683 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9684 for aliasing purposes. */
9685 return XVECEXP (base, 0, 0);
9688 return op;
9691 /* More elaborate version of recog's offsettable_memref_p predicate
9692 that works around the ??? note of rs6000_mode_dependent_address.
9693 In particular it accepts
9695 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9697 in 32-bit mode, that the recog predicate rejects. */
9699 static bool
9700 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9702 bool worst_case;
9704 if (!MEM_P (op))
9705 return false;
9707 /* First mimic offsettable_memref_p. */
9708 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9709 return true;
9711 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9712 the latter predicate knows nothing about the mode of the memory
9713 reference and, therefore, assumes that it is the largest supported
9714 mode (TFmode). As a consequence, legitimate offsettable memory
9715 references are rejected. rs6000_legitimate_offset_address_p contains
9716 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9717 at least with a little bit of help here given that we know the
9718 actual registers used. */
9719 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9720 || GET_MODE_SIZE (reg_mode) == 4);
9721 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9722 strict, worst_case);
9725 /* Determine the reassociation width to be used in reassociate_bb.
9726 This takes into account how many parallel operations we
9727 can actually do of a given type, and also the latency.
9729 int add/sub 6/cycle
9730 mul 2/cycle
9731 vect add/sub/mul 2/cycle
9732 fp add/sub/mul 2/cycle
9733 dfp 1/cycle
9736 static int
9737 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9738 machine_mode mode)
9740 switch (rs6000_tune)
9742 case PROCESSOR_POWER8:
9743 case PROCESSOR_POWER9:
9744 if (DECIMAL_FLOAT_MODE_P (mode))
9745 return 1;
9746 if (VECTOR_MODE_P (mode))
9747 return 4;
9748 if (INTEGRAL_MODE_P (mode))
9749 return 1;
9750 if (FLOAT_MODE_P (mode))
9751 return 4;
9752 break;
9753 default:
9754 break;
9756 return 1;
9759 /* Change register usage conditional on target flags. */
9760 static void
9761 rs6000_conditional_register_usage (void)
9763 int i;
9765 if (TARGET_DEBUG_TARGET)
9766 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9768 /* Set MQ register fixed (already call_used) so that it will not be
9769 allocated. */
9770 fixed_regs[64] = 1;
9772 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9773 if (TARGET_64BIT)
9774 fixed_regs[13] = call_used_regs[13]
9775 = call_really_used_regs[13] = 1;
9777 /* Conditionally disable FPRs. */
9778 if (TARGET_SOFT_FLOAT)
9779 for (i = 32; i < 64; i++)
9780 fixed_regs[i] = call_used_regs[i]
9781 = call_really_used_regs[i] = 1;
9783 /* The TOC register is not killed across calls in a way that is
9784 visible to the compiler. */
9785 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9786 call_really_used_regs[2] = 0;
9788 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9789 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9791 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9792 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9793 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9794 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9796 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9797 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9798 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9799 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9801 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9802 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9803 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9805 if (!TARGET_ALTIVEC && !TARGET_VSX)
9807 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9808 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9809 call_really_used_regs[VRSAVE_REGNO] = 1;
9812 if (TARGET_ALTIVEC || TARGET_VSX)
9813 global_regs[VSCR_REGNO] = 1;
9815 if (TARGET_ALTIVEC_ABI)
9817 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9818 call_used_regs[i] = call_really_used_regs[i] = 1;
9820 /* AIX reserves VR20:31 in non-extended ABI mode. */
9821 if (TARGET_XCOFF)
9822 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9823 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9828 /* Output insns to set DEST equal to the constant SOURCE as a series of
9829 lis, ori and shl instructions and return TRUE. */
9831 bool
9832 rs6000_emit_set_const (rtx dest, rtx source)
9834 machine_mode mode = GET_MODE (dest);
9835 rtx temp, set;
9836 rtx_insn *insn;
9837 HOST_WIDE_INT c;
9839 gcc_checking_assert (CONST_INT_P (source));
9840 c = INTVAL (source);
9841 switch (mode)
9843 case E_QImode:
9844 case E_HImode:
9845 emit_insn (gen_rtx_SET (dest, source));
9846 return true;
9848 case E_SImode:
9849 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9851 emit_insn (gen_rtx_SET (copy_rtx (temp),
9852 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9853 emit_insn (gen_rtx_SET (dest,
9854 gen_rtx_IOR (SImode, copy_rtx (temp),
9855 GEN_INT (c & 0xffff))));
9856 break;
9858 case E_DImode:
9859 if (!TARGET_POWERPC64)
9861 rtx hi, lo;
9863 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9864 DImode);
9865 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9866 DImode);
9867 emit_move_insn (hi, GEN_INT (c >> 32));
9868 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9869 emit_move_insn (lo, GEN_INT (c));
9871 else
9872 rs6000_emit_set_long_const (dest, c);
9873 break;
9875 default:
9876 gcc_unreachable ();
9879 insn = get_last_insn ();
9880 set = single_set (insn);
9881 if (! CONSTANT_P (SET_SRC (set)))
9882 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9884 return true;
9887 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9888 Output insns to set DEST equal to the constant C as a series of
9889 lis, ori and shl instructions. */
9891 static void
9892 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9894 rtx temp;
9895 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9897 ud1 = c & 0xffff;
9898 c = c >> 16;
9899 ud2 = c & 0xffff;
9900 c = c >> 16;
9901 ud3 = c & 0xffff;
9902 c = c >> 16;
9903 ud4 = c & 0xffff;
9905 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9906 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9907 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9909 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9910 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9912 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9914 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9915 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9916 if (ud1 != 0)
9917 emit_move_insn (dest,
9918 gen_rtx_IOR (DImode, copy_rtx (temp),
9919 GEN_INT (ud1)));
9921 else if (ud3 == 0 && ud4 == 0)
9923 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9925 gcc_assert (ud2 & 0x8000);
9926 emit_move_insn (copy_rtx (temp),
9927 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9928 if (ud1 != 0)
9929 emit_move_insn (copy_rtx (temp),
9930 gen_rtx_IOR (DImode, copy_rtx (temp),
9931 GEN_INT (ud1)));
9932 emit_move_insn (dest,
9933 gen_rtx_ZERO_EXTEND (DImode,
9934 gen_lowpart (SImode,
9935 copy_rtx (temp))));
9937 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9938 || (ud4 == 0 && ! (ud3 & 0x8000)))
9940 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9942 emit_move_insn (copy_rtx (temp),
9943 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9944 if (ud2 != 0)
9945 emit_move_insn (copy_rtx (temp),
9946 gen_rtx_IOR (DImode, copy_rtx (temp),
9947 GEN_INT (ud2)));
9948 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9949 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9950 GEN_INT (16)));
9951 if (ud1 != 0)
9952 emit_move_insn (dest,
9953 gen_rtx_IOR (DImode, copy_rtx (temp),
9954 GEN_INT (ud1)));
9956 else
9958 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9960 emit_move_insn (copy_rtx (temp),
9961 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9962 if (ud3 != 0)
9963 emit_move_insn (copy_rtx (temp),
9964 gen_rtx_IOR (DImode, copy_rtx (temp),
9965 GEN_INT (ud3)));
9967 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9968 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9969 GEN_INT (32)));
9970 if (ud2 != 0)
9971 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9972 gen_rtx_IOR (DImode, copy_rtx (temp),
9973 GEN_INT (ud2 << 16)));
9974 if (ud1 != 0)
9975 emit_move_insn (dest,
9976 gen_rtx_IOR (DImode, copy_rtx (temp),
9977 GEN_INT (ud1)));
9981 /* Helper for the following. Get rid of [r+r] memory refs
9982 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9984 static void
9985 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9987 if (GET_CODE (operands[0]) == MEM
9988 && GET_CODE (XEXP (operands[0], 0)) != REG
9989 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9990 GET_MODE (operands[0]), false))
9991 operands[0]
9992 = replace_equiv_address (operands[0],
9993 copy_addr_to_reg (XEXP (operands[0], 0)));
9995 if (GET_CODE (operands[1]) == MEM
9996 && GET_CODE (XEXP (operands[1], 0)) != REG
9997 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9998 GET_MODE (operands[1]), false))
9999 operands[1]
10000 = replace_equiv_address (operands[1],
10001 copy_addr_to_reg (XEXP (operands[1], 0)));
10004 /* Generate a vector of constants to permute MODE for a little-endian
10005 storage operation by swapping the two halves of a vector. */
10006 static rtvec
10007 rs6000_const_vec (machine_mode mode)
10009 int i, subparts;
10010 rtvec v;
10012 switch (mode)
10014 case E_V1TImode:
10015 subparts = 1;
10016 break;
10017 case E_V2DFmode:
10018 case E_V2DImode:
10019 subparts = 2;
10020 break;
10021 case E_V4SFmode:
10022 case E_V4SImode:
10023 subparts = 4;
10024 break;
10025 case E_V8HImode:
10026 subparts = 8;
10027 break;
10028 case E_V16QImode:
10029 subparts = 16;
10030 break;
10031 default:
10032 gcc_unreachable();
10035 v = rtvec_alloc (subparts);
10037 for (i = 0; i < subparts / 2; ++i)
10038 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10039 for (i = subparts / 2; i < subparts; ++i)
10040 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10042 return v;
10045 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10046 store operation. */
10047 void
10048 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10050 /* Scalar permutations are easier to express in integer modes rather than
10051 floating-point modes, so cast them here. We use V1TImode instead
10052 of TImode to ensure that the values don't go through GPRs. */
10053 if (FLOAT128_VECTOR_P (mode))
10055 dest = gen_lowpart (V1TImode, dest);
10056 source = gen_lowpart (V1TImode, source);
10057 mode = V1TImode;
10060 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10061 scalar. */
10062 if (mode == TImode || mode == V1TImode)
10063 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10064 GEN_INT (64))));
10065 else
10067 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10068 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10072 /* Emit a little-endian load from vector memory location SOURCE to VSX
10073 register DEST in mode MODE. The load is done with two permuting
10074 insn's that represent an lxvd2x and xxpermdi. */
10075 void
10076 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10078 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10079 V1TImode). */
10080 if (mode == TImode || mode == V1TImode)
10082 mode = V2DImode;
10083 dest = gen_lowpart (V2DImode, dest);
10084 source = adjust_address (source, V2DImode, 0);
10087 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10088 rs6000_emit_le_vsx_permute (tmp, source, mode);
10089 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10092 /* Emit a little-endian store to vector memory location DEST from VSX
10093 register SOURCE in mode MODE. The store is done with two permuting
10094 insn's that represent an xxpermdi and an stxvd2x. */
10095 void
10096 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10098 /* This should never be called during or after LRA, because it does
10099 not re-permute the source register. It is intended only for use
10100 during expand. */
10101 gcc_assert (!lra_in_progress && !reload_completed);
10103 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10104 V1TImode). */
10105 if (mode == TImode || mode == V1TImode)
10107 mode = V2DImode;
10108 dest = adjust_address (dest, V2DImode, 0);
10109 source = gen_lowpart (V2DImode, source);
10112 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10113 rs6000_emit_le_vsx_permute (tmp, source, mode);
10114 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10117 /* Emit a sequence representing a little-endian VSX load or store,
10118 moving data from SOURCE to DEST in mode MODE. This is done
10119 separately from rs6000_emit_move to ensure it is called only
10120 during expand. LE VSX loads and stores introduced later are
10121 handled with a split. The expand-time RTL generation allows
10122 us to optimize away redundant pairs of register-permutes. */
10123 void
10124 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10126 gcc_assert (!BYTES_BIG_ENDIAN
10127 && VECTOR_MEM_VSX_P (mode)
10128 && !TARGET_P9_VECTOR
10129 && !gpr_or_gpr_p (dest, source)
10130 && (MEM_P (source) ^ MEM_P (dest)));
10132 if (MEM_P (source))
10134 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10135 rs6000_emit_le_vsx_load (dest, source, mode);
10137 else
10139 if (!REG_P (source))
10140 source = force_reg (mode, source);
10141 rs6000_emit_le_vsx_store (dest, source, mode);
10145 /* Return whether a SFmode or SImode move can be done without converting one
10146 mode to another. This arrises when we have:
10148 (SUBREG:SF (REG:SI ...))
10149 (SUBREG:SI (REG:SF ...))
10151 and one of the values is in a floating point/vector register, where SFmode
10152 scalars are stored in DFmode format. */
10154 bool
10155 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10157 if (TARGET_ALLOW_SF_SUBREG)
10158 return true;
10160 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10161 return true;
10163 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10164 return true;
10166 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10167 if (SUBREG_P (dest))
10169 rtx dest_subreg = SUBREG_REG (dest);
10170 rtx src_subreg = SUBREG_REG (src);
10171 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10174 return false;
10178 /* Helper function to change moves with:
10180 (SUBREG:SF (REG:SI)) and
10181 (SUBREG:SI (REG:SF))
10183 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10184 values are stored as DFmode values in the VSX registers. We need to convert
10185 the bits before we can use a direct move or operate on the bits in the
10186 vector register as an integer type.
10188 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10190 static bool
10191 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10193 if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed
10194 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10195 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10197 rtx inner_source = SUBREG_REG (source);
10198 machine_mode inner_mode = GET_MODE (inner_source);
10200 if (mode == SImode && inner_mode == SFmode)
10202 emit_insn (gen_movsi_from_sf (dest, inner_source));
10203 return true;
10206 if (mode == SFmode && inner_mode == SImode)
10208 emit_insn (gen_movsf_from_si (dest, inner_source));
10209 return true;
10213 return false;
10216 /* Emit a move from SOURCE to DEST in mode MODE. */
10217 void
10218 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10220 rtx operands[2];
10221 operands[0] = dest;
10222 operands[1] = source;
10224 if (TARGET_DEBUG_ADDR)
10226 fprintf (stderr,
10227 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10228 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10229 GET_MODE_NAME (mode),
10230 lra_in_progress,
10231 reload_completed,
10232 can_create_pseudo_p ());
10233 debug_rtx (dest);
10234 fprintf (stderr, "source:\n");
10235 debug_rtx (source);
10238 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10239 if (CONST_WIDE_INT_P (operands[1])
10240 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10242 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10243 gcc_unreachable ();
10246 #ifdef HAVE_AS_GNU_ATTRIBUTE
10247 /* If we use a long double type, set the flags in .gnu_attribute that say
10248 what the long double type is. This is to allow the linker's warning
10249 message for the wrong long double to be useful, even if the function does
10250 not do a call (for example, doing a 128-bit add on power9 if the long
10251 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10252 used if they aren't the default long dobule type. */
10253 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10255 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10256 rs6000_passes_float = rs6000_passes_long_double = true;
10258 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10259 rs6000_passes_float = rs6000_passes_long_double = true;
10261 #endif
10263 /* See if we need to special case SImode/SFmode SUBREG moves. */
10264 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10265 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10266 return;
10268 /* Check if GCC is setting up a block move that will end up using FP
10269 registers as temporaries. We must make sure this is acceptable. */
10270 if (GET_CODE (operands[0]) == MEM
10271 && GET_CODE (operands[1]) == MEM
10272 && mode == DImode
10273 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10274 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10275 && ! (rs6000_slow_unaligned_access (SImode,
10276 (MEM_ALIGN (operands[0]) > 32
10277 ? 32 : MEM_ALIGN (operands[0])))
10278 || rs6000_slow_unaligned_access (SImode,
10279 (MEM_ALIGN (operands[1]) > 32
10280 ? 32 : MEM_ALIGN (operands[1]))))
10281 && ! MEM_VOLATILE_P (operands [0])
10282 && ! MEM_VOLATILE_P (operands [1]))
10284 emit_move_insn (adjust_address (operands[0], SImode, 0),
10285 adjust_address (operands[1], SImode, 0));
10286 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10287 adjust_address (copy_rtx (operands[1]), SImode, 4));
10288 return;
10291 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10292 && !gpc_reg_operand (operands[1], mode))
10293 operands[1] = force_reg (mode, operands[1]);
10295 /* Recognize the case where operand[1] is a reference to thread-local
10296 data and load its address to a register. */
10297 if (tls_referenced_p (operands[1]))
10299 enum tls_model model;
10300 rtx tmp = operands[1];
10301 rtx addend = NULL;
10303 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10305 addend = XEXP (XEXP (tmp, 0), 1);
10306 tmp = XEXP (XEXP (tmp, 0), 0);
10309 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10310 model = SYMBOL_REF_TLS_MODEL (tmp);
10311 gcc_assert (model != 0);
10313 tmp = rs6000_legitimize_tls_address (tmp, model);
10314 if (addend)
10316 tmp = gen_rtx_PLUS (mode, tmp, addend);
10317 tmp = force_operand (tmp, operands[0]);
10319 operands[1] = tmp;
10322 /* 128-bit constant floating-point values on Darwin should really be loaded
10323 as two parts. However, this premature splitting is a problem when DFmode
10324 values can go into Altivec registers. */
10325 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10326 && GET_CODE (operands[1]) == CONST_DOUBLE)
10328 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10329 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10330 DFmode);
10331 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10332 GET_MODE_SIZE (DFmode)),
10333 simplify_gen_subreg (DFmode, operands[1], mode,
10334 GET_MODE_SIZE (DFmode)),
10335 DFmode);
10336 return;
10339 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10340 p1:SD) if p1 is not of floating point class and p0 is spilled as
10341 we can have no analogous movsd_store for this. */
10342 if (lra_in_progress && mode == DDmode
10343 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10344 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10345 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10346 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10348 enum reg_class cl;
10349 int regno = REGNO (SUBREG_REG (operands[1]));
10351 if (regno >= FIRST_PSEUDO_REGISTER)
10353 cl = reg_preferred_class (regno);
10354 regno = reg_renumber[regno];
10355 if (regno < 0)
10356 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10358 if (regno >= 0 && ! FP_REGNO_P (regno))
10360 mode = SDmode;
10361 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10362 operands[1] = SUBREG_REG (operands[1]);
10365 if (lra_in_progress
10366 && mode == SDmode
10367 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10368 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10369 && (REG_P (operands[1])
10370 || (GET_CODE (operands[1]) == SUBREG
10371 && REG_P (SUBREG_REG (operands[1])))))
10373 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10374 ? SUBREG_REG (operands[1]) : operands[1]);
10375 enum reg_class cl;
10377 if (regno >= FIRST_PSEUDO_REGISTER)
10379 cl = reg_preferred_class (regno);
10380 gcc_assert (cl != NO_REGS);
10381 regno = reg_renumber[regno];
10382 if (regno < 0)
10383 regno = ira_class_hard_regs[cl][0];
10385 if (FP_REGNO_P (regno))
10387 if (GET_MODE (operands[0]) != DDmode)
10388 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10389 emit_insn (gen_movsd_store (operands[0], operands[1]));
10391 else if (INT_REGNO_P (regno))
10392 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10393 else
10394 gcc_unreachable();
10395 return;
10397 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10398 p:DD)) if p0 is not of floating point class and p1 is spilled as
10399 we can have no analogous movsd_load for this. */
10400 if (lra_in_progress && mode == DDmode
10401 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10402 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10403 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10404 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10406 enum reg_class cl;
10407 int regno = REGNO (SUBREG_REG (operands[0]));
10409 if (regno >= FIRST_PSEUDO_REGISTER)
10411 cl = reg_preferred_class (regno);
10412 regno = reg_renumber[regno];
10413 if (regno < 0)
10414 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10416 if (regno >= 0 && ! FP_REGNO_P (regno))
10418 mode = SDmode;
10419 operands[0] = SUBREG_REG (operands[0]);
10420 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10423 if (lra_in_progress
10424 && mode == SDmode
10425 && (REG_P (operands[0])
10426 || (GET_CODE (operands[0]) == SUBREG
10427 && REG_P (SUBREG_REG (operands[0]))))
10428 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10429 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10431 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10432 ? SUBREG_REG (operands[0]) : operands[0]);
10433 enum reg_class cl;
10435 if (regno >= FIRST_PSEUDO_REGISTER)
10437 cl = reg_preferred_class (regno);
10438 gcc_assert (cl != NO_REGS);
10439 regno = reg_renumber[regno];
10440 if (regno < 0)
10441 regno = ira_class_hard_regs[cl][0];
10443 if (FP_REGNO_P (regno))
10445 if (GET_MODE (operands[1]) != DDmode)
10446 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10447 emit_insn (gen_movsd_load (operands[0], operands[1]));
10449 else if (INT_REGNO_P (regno))
10450 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10451 else
10452 gcc_unreachable();
10453 return;
10456 /* FIXME: In the long term, this switch statement should go away
10457 and be replaced by a sequence of tests based on things like
10458 mode == Pmode. */
10459 switch (mode)
10461 case E_HImode:
10462 case E_QImode:
10463 if (CONSTANT_P (operands[1])
10464 && GET_CODE (operands[1]) != CONST_INT)
10465 operands[1] = force_const_mem (mode, operands[1]);
10466 break;
10468 case E_TFmode:
10469 case E_TDmode:
10470 case E_IFmode:
10471 case E_KFmode:
10472 if (FLOAT128_2REG_P (mode))
10473 rs6000_eliminate_indexed_memrefs (operands);
10474 /* fall through */
10476 case E_DFmode:
10477 case E_DDmode:
10478 case E_SFmode:
10479 case E_SDmode:
10480 if (CONSTANT_P (operands[1])
10481 && ! easy_fp_constant (operands[1], mode))
10482 operands[1] = force_const_mem (mode, operands[1]);
10483 break;
10485 case E_V16QImode:
10486 case E_V8HImode:
10487 case E_V4SFmode:
10488 case E_V4SImode:
10489 case E_V2DFmode:
10490 case E_V2DImode:
10491 case E_V1TImode:
10492 if (CONSTANT_P (operands[1])
10493 && !easy_vector_constant (operands[1], mode))
10494 operands[1] = force_const_mem (mode, operands[1]);
10495 break;
10497 case E_SImode:
10498 case E_DImode:
10499 /* Use default pattern for address of ELF small data */
10500 if (TARGET_ELF
10501 && mode == Pmode
10502 && DEFAULT_ABI == ABI_V4
10503 && (GET_CODE (operands[1]) == SYMBOL_REF
10504 || GET_CODE (operands[1]) == CONST)
10505 && small_data_operand (operands[1], mode))
10507 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10508 return;
10511 if (DEFAULT_ABI == ABI_V4
10512 && mode == Pmode && mode == SImode
10513 && flag_pic == 1 && got_operand (operands[1], mode))
10515 emit_insn (gen_movsi_got (operands[0], operands[1]));
10516 return;
10519 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10520 && TARGET_NO_TOC
10521 && ! flag_pic
10522 && mode == Pmode
10523 && CONSTANT_P (operands[1])
10524 && GET_CODE (operands[1]) != HIGH
10525 && GET_CODE (operands[1]) != CONST_INT)
10527 rtx target = (!can_create_pseudo_p ()
10528 ? operands[0]
10529 : gen_reg_rtx (mode));
10531 /* If this is a function address on -mcall-aixdesc,
10532 convert it to the address of the descriptor. */
10533 if (DEFAULT_ABI == ABI_AIX
10534 && GET_CODE (operands[1]) == SYMBOL_REF
10535 && XSTR (operands[1], 0)[0] == '.')
10537 const char *name = XSTR (operands[1], 0);
10538 rtx new_ref;
10539 while (*name == '.')
10540 name++;
10541 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10542 CONSTANT_POOL_ADDRESS_P (new_ref)
10543 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10544 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10545 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10546 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10547 operands[1] = new_ref;
10550 if (DEFAULT_ABI == ABI_DARWIN)
10552 #if TARGET_MACHO
10553 if (MACHO_DYNAMIC_NO_PIC_P)
10555 /* Take care of any required data indirection. */
10556 operands[1] = rs6000_machopic_legitimize_pic_address (
10557 operands[1], mode, operands[0]);
10558 if (operands[0] != operands[1])
10559 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10560 return;
10562 #endif
10563 emit_insn (gen_macho_high (target, operands[1]));
10564 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10565 return;
10568 emit_insn (gen_elf_high (target, operands[1]));
10569 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10570 return;
10573 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10574 and we have put it in the TOC, we just need to make a TOC-relative
10575 reference to it. */
10576 if (TARGET_TOC
10577 && GET_CODE (operands[1]) == SYMBOL_REF
10578 && use_toc_relative_ref (operands[1], mode))
10579 operands[1] = create_TOC_reference (operands[1], operands[0]);
10580 else if (mode == Pmode
10581 && CONSTANT_P (operands[1])
10582 && GET_CODE (operands[1]) != HIGH
10583 && ((GET_CODE (operands[1]) != CONST_INT
10584 && ! easy_fp_constant (operands[1], mode))
10585 || (GET_CODE (operands[1]) == CONST_INT
10586 && (num_insns_constant (operands[1], mode)
10587 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10588 || (GET_CODE (operands[0]) == REG
10589 && FP_REGNO_P (REGNO (operands[0]))))
10590 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10591 && (TARGET_CMODEL == CMODEL_SMALL
10592 || can_create_pseudo_p ()
10593 || (REG_P (operands[0])
10594 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10597 #if TARGET_MACHO
10598 /* Darwin uses a special PIC legitimizer. */
10599 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10601 operands[1] =
10602 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10603 operands[0]);
10604 if (operands[0] != operands[1])
10605 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10606 return;
10608 #endif
10610 /* If we are to limit the number of things we put in the TOC and
10611 this is a symbol plus a constant we can add in one insn,
10612 just put the symbol in the TOC and add the constant. */
10613 if (GET_CODE (operands[1]) == CONST
10614 && TARGET_NO_SUM_IN_TOC
10615 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10616 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10617 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10618 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10619 && ! side_effects_p (operands[0]))
10621 rtx sym =
10622 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10623 rtx other = XEXP (XEXP (operands[1], 0), 1);
10625 sym = force_reg (mode, sym);
10626 emit_insn (gen_add3_insn (operands[0], sym, other));
10627 return;
10630 operands[1] = force_const_mem (mode, operands[1]);
10632 if (TARGET_TOC
10633 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10634 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10636 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10637 operands[0]);
10638 operands[1] = gen_const_mem (mode, tocref);
10639 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10642 break;
10644 case E_TImode:
10645 if (!VECTOR_MEM_VSX_P (TImode))
10646 rs6000_eliminate_indexed_memrefs (operands);
10647 break;
10649 case E_PTImode:
10650 rs6000_eliminate_indexed_memrefs (operands);
10651 break;
10653 default:
10654 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10657 /* Above, we may have called force_const_mem which may have returned
10658 an invalid address. If we can, fix this up; otherwise, reload will
10659 have to deal with it. */
10660 if (GET_CODE (operands[1]) == MEM)
10661 operands[1] = validize_mem (operands[1]);
10663 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10666 /* Nonzero if we can use a floating-point register to pass this arg. */
10667 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10668 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10669 && (CUM)->fregno <= FP_ARG_MAX_REG \
10670 && TARGET_HARD_FLOAT)
10672 /* Nonzero if we can use an AltiVec register to pass this arg. */
10673 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10674 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10675 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10676 && TARGET_ALTIVEC_ABI \
10677 && (NAMED))
10679 /* Walk down the type tree of TYPE counting consecutive base elements.
10680 If *MODEP is VOIDmode, then set it to the first valid floating point
10681 or vector type. If a non-floating point or vector type is found, or
10682 if a floating point or vector type that doesn't match a non-VOIDmode
10683 *MODEP is found, then return -1, otherwise return the count in the
10684 sub-tree. */
10686 static int
10687 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10689 machine_mode mode;
10690 HOST_WIDE_INT size;
10692 switch (TREE_CODE (type))
10694 case REAL_TYPE:
10695 mode = TYPE_MODE (type);
10696 if (!SCALAR_FLOAT_MODE_P (mode))
10697 return -1;
10699 if (*modep == VOIDmode)
10700 *modep = mode;
10702 if (*modep == mode)
10703 return 1;
10705 break;
10707 case COMPLEX_TYPE:
10708 mode = TYPE_MODE (TREE_TYPE (type));
10709 if (!SCALAR_FLOAT_MODE_P (mode))
10710 return -1;
10712 if (*modep == VOIDmode)
10713 *modep = mode;
10715 if (*modep == mode)
10716 return 2;
10718 break;
10720 case VECTOR_TYPE:
10721 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10722 return -1;
10724 /* Use V4SImode as representative of all 128-bit vector types. */
10725 size = int_size_in_bytes (type);
10726 switch (size)
10728 case 16:
10729 mode = V4SImode;
10730 break;
10731 default:
10732 return -1;
10735 if (*modep == VOIDmode)
10736 *modep = mode;
10738 /* Vector modes are considered to be opaque: two vectors are
10739 equivalent for the purposes of being homogeneous aggregates
10740 if they are the same size. */
10741 if (*modep == mode)
10742 return 1;
10744 break;
10746 case ARRAY_TYPE:
10748 int count;
10749 tree index = TYPE_DOMAIN (type);
10751 /* Can't handle incomplete types nor sizes that are not
10752 fixed. */
10753 if (!COMPLETE_TYPE_P (type)
10754 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10755 return -1;
10757 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10758 if (count == -1
10759 || !index
10760 || !TYPE_MAX_VALUE (index)
10761 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10762 || !TYPE_MIN_VALUE (index)
10763 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10764 || count < 0)
10765 return -1;
10767 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10768 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10770 /* There must be no padding. */
10771 if (wi::to_wide (TYPE_SIZE (type))
10772 != count * GET_MODE_BITSIZE (*modep))
10773 return -1;
10775 return count;
10778 case RECORD_TYPE:
10780 int count = 0;
10781 int sub_count;
10782 tree field;
10784 /* Can't handle incomplete types nor sizes that are not
10785 fixed. */
10786 if (!COMPLETE_TYPE_P (type)
10787 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10788 return -1;
10790 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10792 if (TREE_CODE (field) != FIELD_DECL)
10793 continue;
10795 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10796 if (sub_count < 0)
10797 return -1;
10798 count += sub_count;
10801 /* There must be no padding. */
10802 if (wi::to_wide (TYPE_SIZE (type))
10803 != count * GET_MODE_BITSIZE (*modep))
10804 return -1;
10806 return count;
10809 case UNION_TYPE:
10810 case QUAL_UNION_TYPE:
10812 /* These aren't very interesting except in a degenerate case. */
10813 int count = 0;
10814 int sub_count;
10815 tree field;
10817 /* Can't handle incomplete types nor sizes that are not
10818 fixed. */
10819 if (!COMPLETE_TYPE_P (type)
10820 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10821 return -1;
10823 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10825 if (TREE_CODE (field) != FIELD_DECL)
10826 continue;
10828 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10829 if (sub_count < 0)
10830 return -1;
10831 count = count > sub_count ? count : sub_count;
10834 /* There must be no padding. */
10835 if (wi::to_wide (TYPE_SIZE (type))
10836 != count * GET_MODE_BITSIZE (*modep))
10837 return -1;
10839 return count;
10842 default:
10843 break;
10846 return -1;
10849 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10850 float or vector aggregate that shall be passed in FP/vector registers
10851 according to the ELFv2 ABI, return the homogeneous element mode in
10852 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10854 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10856 static bool
10857 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10858 machine_mode *elt_mode,
10859 int *n_elts)
10861 /* Note that we do not accept complex types at the top level as
10862 homogeneous aggregates; these types are handled via the
10863 targetm.calls.split_complex_arg mechanism. Complex types
10864 can be elements of homogeneous aggregates, however. */
10865 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10866 && AGGREGATE_TYPE_P (type))
10868 machine_mode field_mode = VOIDmode;
10869 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10871 if (field_count > 0)
10873 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10874 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10876 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10877 up to AGGR_ARG_NUM_REG registers. */
10878 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10880 if (elt_mode)
10881 *elt_mode = field_mode;
10882 if (n_elts)
10883 *n_elts = field_count;
10884 return true;
10889 if (elt_mode)
10890 *elt_mode = mode;
10891 if (n_elts)
10892 *n_elts = 1;
10893 return false;
10896 /* Return a nonzero value to say to return the function value in
10897 memory, just as large structures are always returned. TYPE will be
10898 the data type of the value, and FNTYPE will be the type of the
10899 function doing the returning, or @code{NULL} for libcalls.
10901 The AIX ABI for the RS/6000 specifies that all structures are
10902 returned in memory. The Darwin ABI does the same.
10904 For the Darwin 64 Bit ABI, a function result can be returned in
10905 registers or in memory, depending on the size of the return data
10906 type. If it is returned in registers, the value occupies the same
10907 registers as it would if it were the first and only function
10908 argument. Otherwise, the function places its result in memory at
10909 the location pointed to by GPR3.
10911 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10912 but a draft put them in memory, and GCC used to implement the draft
10913 instead of the final standard. Therefore, aix_struct_return
10914 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10915 compatibility can change DRAFT_V4_STRUCT_RET to override the
10916 default, and -m switches get the final word. See
10917 rs6000_option_override_internal for more details.
10919 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10920 long double support is enabled. These values are returned in memory.
10922 int_size_in_bytes returns -1 for variable size objects, which go in
10923 memory always. The cast to unsigned makes -1 > 8. */
10925 static bool
10926 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10928 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10929 if (TARGET_MACHO
10930 && rs6000_darwin64_abi
10931 && TREE_CODE (type) == RECORD_TYPE
10932 && int_size_in_bytes (type) > 0)
10934 CUMULATIVE_ARGS valcum;
10935 rtx valret;
10937 valcum.words = 0;
10938 valcum.fregno = FP_ARG_MIN_REG;
10939 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10940 /* Do a trial code generation as if this were going to be passed
10941 as an argument; if any part goes in memory, we return NULL. */
10942 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10943 if (valret)
10944 return false;
10945 /* Otherwise fall through to more conventional ABI rules. */
10948 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10949 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10950 NULL, NULL))
10951 return false;
10953 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10954 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10955 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10956 return false;
10958 if (AGGREGATE_TYPE_P (type)
10959 && (aix_struct_return
10960 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10961 return true;
10963 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10964 modes only exist for GCC vector types if -maltivec. */
10965 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10966 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10967 return false;
10969 /* Return synthetic vectors in memory. */
10970 if (TREE_CODE (type) == VECTOR_TYPE
10971 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10973 static bool warned_for_return_big_vectors = false;
10974 if (!warned_for_return_big_vectors)
10976 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10977 "non-standard ABI extension with no compatibility "
10978 "guarantee");
10979 warned_for_return_big_vectors = true;
10981 return true;
10984 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10985 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10986 return true;
10988 return false;
10991 /* Specify whether values returned in registers should be at the most
10992 significant end of a register. We want aggregates returned by
10993 value to match the way aggregates are passed to functions. */
10995 static bool
10996 rs6000_return_in_msb (const_tree valtype)
10998 return (DEFAULT_ABI == ABI_ELFv2
10999 && BYTES_BIG_ENDIAN
11000 && AGGREGATE_TYPE_P (valtype)
11001 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
11002 == PAD_UPWARD));
11005 #ifdef HAVE_AS_GNU_ATTRIBUTE
11006 /* Return TRUE if a call to function FNDECL may be one that
11007 potentially affects the function calling ABI of the object file. */
11009 static bool
11010 call_ABI_of_interest (tree fndecl)
11012 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11014 struct cgraph_node *c_node;
11016 /* Libcalls are always interesting. */
11017 if (fndecl == NULL_TREE)
11018 return true;
11020 /* Any call to an external function is interesting. */
11021 if (DECL_EXTERNAL (fndecl))
11022 return true;
11024 /* Interesting functions that we are emitting in this object file. */
11025 c_node = cgraph_node::get (fndecl);
11026 c_node = c_node->ultimate_alias_target ();
11027 return !c_node->only_called_directly_p ();
11029 return false;
11031 #endif
11033 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11034 for a call to a function whose data type is FNTYPE.
11035 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11037 For incoming args we set the number of arguments in the prototype large
11038 so we never return a PARALLEL. */
11040 void
11041 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11042 rtx libname ATTRIBUTE_UNUSED, int incoming,
11043 int libcall, int n_named_args,
11044 tree fndecl ATTRIBUTE_UNUSED,
11045 machine_mode return_mode ATTRIBUTE_UNUSED)
11047 static CUMULATIVE_ARGS zero_cumulative;
11049 *cum = zero_cumulative;
11050 cum->words = 0;
11051 cum->fregno = FP_ARG_MIN_REG;
11052 cum->vregno = ALTIVEC_ARG_MIN_REG;
11053 cum->prototype = (fntype && prototype_p (fntype));
11054 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11055 ? CALL_LIBCALL : CALL_NORMAL);
11056 cum->sysv_gregno = GP_ARG_MIN_REG;
11057 cum->stdarg = stdarg_p (fntype);
11058 cum->libcall = libcall;
11060 cum->nargs_prototype = 0;
11061 if (incoming || cum->prototype)
11062 cum->nargs_prototype = n_named_args;
11064 /* Check for a longcall attribute. */
11065 if ((!fntype && rs6000_default_long_calls)
11066 || (fntype
11067 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11068 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11069 cum->call_cookie |= CALL_LONG;
11071 if (TARGET_DEBUG_ARG)
11073 fprintf (stderr, "\ninit_cumulative_args:");
11074 if (fntype)
11076 tree ret_type = TREE_TYPE (fntype);
11077 fprintf (stderr, " ret code = %s,",
11078 get_tree_code_name (TREE_CODE (ret_type)));
11081 if (cum->call_cookie & CALL_LONG)
11082 fprintf (stderr, " longcall,");
11084 fprintf (stderr, " proto = %d, nargs = %d\n",
11085 cum->prototype, cum->nargs_prototype);
11088 #ifdef HAVE_AS_GNU_ATTRIBUTE
11089 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11091 cum->escapes = call_ABI_of_interest (fndecl);
11092 if (cum->escapes)
11094 tree return_type;
11096 if (fntype)
11098 return_type = TREE_TYPE (fntype);
11099 return_mode = TYPE_MODE (return_type);
11101 else
11102 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11104 if (return_type != NULL)
11106 if (TREE_CODE (return_type) == RECORD_TYPE
11107 && TYPE_TRANSPARENT_AGGR (return_type))
11109 return_type = TREE_TYPE (first_field (return_type));
11110 return_mode = TYPE_MODE (return_type);
11112 if (AGGREGATE_TYPE_P (return_type)
11113 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11114 <= 8))
11115 rs6000_returns_struct = true;
11117 if (SCALAR_FLOAT_MODE_P (return_mode))
11119 rs6000_passes_float = true;
11120 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11121 && (FLOAT128_IBM_P (return_mode)
11122 || FLOAT128_IEEE_P (return_mode)
11123 || (return_type != NULL
11124 && (TYPE_MAIN_VARIANT (return_type)
11125 == long_double_type_node))))
11126 rs6000_passes_long_double = true;
11128 /* Note if we passed or return a IEEE 128-bit type. We changed
11129 the mangling for these types, and we may need to make an alias
11130 with the old mangling. */
11131 if (FLOAT128_IEEE_P (return_mode))
11132 rs6000_passes_ieee128 = true;
11134 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
11135 rs6000_passes_vector = true;
11138 #endif
11140 if (fntype
11141 && !TARGET_ALTIVEC
11142 && TARGET_ALTIVEC_ABI
11143 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11145 error ("cannot return value in vector register because"
11146 " altivec instructions are disabled, use %qs"
11147 " to enable them", "-maltivec");
11151 /* The mode the ABI uses for a word. This is not the same as word_mode
11152 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11154 static scalar_int_mode
11155 rs6000_abi_word_mode (void)
11157 return TARGET_32BIT ? SImode : DImode;
11160 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11161 static char *
11162 rs6000_offload_options (void)
11164 if (TARGET_64BIT)
11165 return xstrdup ("-foffload-abi=lp64");
11166 else
11167 return xstrdup ("-foffload-abi=ilp32");
11170 /* On rs6000, function arguments are promoted, as are function return
11171 values. */
11173 static machine_mode
11174 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11175 machine_mode mode,
11176 int *punsignedp ATTRIBUTE_UNUSED,
11177 const_tree, int)
11179 PROMOTE_MODE (mode, *punsignedp, type);
11181 return mode;
11184 /* Return true if TYPE must be passed on the stack and not in registers. */
11186 static bool
11187 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11189 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11190 return must_pass_in_stack_var_size (mode, type);
11191 else
11192 return must_pass_in_stack_var_size_or_pad (mode, type);
11195 static inline bool
11196 is_complex_IBM_long_double (machine_mode mode)
11198 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
11201 /* Whether ABI_V4 passes MODE args to a function in floating point
11202 registers. */
11204 static bool
11205 abi_v4_pass_in_fpr (machine_mode mode, bool named)
11207 if (!TARGET_HARD_FLOAT)
11208 return false;
11209 if (mode == DFmode)
11210 return true;
11211 if (mode == SFmode && named)
11212 return true;
11213 /* ABI_V4 passes complex IBM long double in 8 gprs.
11214 Stupid, but we can't change the ABI now. */
11215 if (is_complex_IBM_long_double (mode))
11216 return false;
11217 if (FLOAT128_2REG_P (mode))
11218 return true;
11219 if (DECIMAL_FLOAT_MODE_P (mode))
11220 return true;
11221 return false;
11224 /* Implement TARGET_FUNCTION_ARG_PADDING.
11226 For the AIX ABI structs are always stored left shifted in their
11227 argument slot. */
11229 static pad_direction
11230 rs6000_function_arg_padding (machine_mode mode, const_tree type)
11232 #ifndef AGGREGATE_PADDING_FIXED
11233 #define AGGREGATE_PADDING_FIXED 0
11234 #endif
11235 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11236 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11237 #endif
11239 if (!AGGREGATE_PADDING_FIXED)
11241 /* GCC used to pass structures of the same size as integer types as
11242 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
11243 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11244 passed padded downward, except that -mstrict-align further
11245 muddied the water in that multi-component structures of 2 and 4
11246 bytes in size were passed padded upward.
11248 The following arranges for best compatibility with previous
11249 versions of gcc, but removes the -mstrict-align dependency. */
11250 if (BYTES_BIG_ENDIAN)
11252 HOST_WIDE_INT size = 0;
11254 if (mode == BLKmode)
11256 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11257 size = int_size_in_bytes (type);
11259 else
11260 size = GET_MODE_SIZE (mode);
11262 if (size == 1 || size == 2 || size == 4)
11263 return PAD_DOWNWARD;
11265 return PAD_UPWARD;
11268 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11270 if (type != 0 && AGGREGATE_TYPE_P (type))
11271 return PAD_UPWARD;
11274 /* Fall back to the default. */
11275 return default_function_arg_padding (mode, type);
11278 /* If defined, a C expression that gives the alignment boundary, in bits,
11279 of an argument with the specified mode and type. If it is not defined,
11280 PARM_BOUNDARY is used for all arguments.
11282 V.4 wants long longs and doubles to be double word aligned. Just
11283 testing the mode size is a boneheaded way to do this as it means
11284 that other types such as complex int are also double word aligned.
11285 However, we're stuck with this because changing the ABI might break
11286 existing library interfaces.
11288 Quadword align Altivec/VSX vectors.
11289 Quadword align large synthetic vector types. */
11291 static unsigned int
11292 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11294 machine_mode elt_mode;
11295 int n_elts;
11297 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11299 if (DEFAULT_ABI == ABI_V4
11300 && (GET_MODE_SIZE (mode) == 8
11301 || (TARGET_HARD_FLOAT
11302 && !is_complex_IBM_long_double (mode)
11303 && FLOAT128_2REG_P (mode))))
11304 return 64;
11305 else if (FLOAT128_VECTOR_P (mode))
11306 return 128;
11307 else if (type && TREE_CODE (type) == VECTOR_TYPE
11308 && int_size_in_bytes (type) >= 8
11309 && int_size_in_bytes (type) < 16)
11310 return 64;
11311 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11312 || (type && TREE_CODE (type) == VECTOR_TYPE
11313 && int_size_in_bytes (type) >= 16))
11314 return 128;
11316 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11317 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11318 -mcompat-align-parm is used. */
11319 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11320 || DEFAULT_ABI == ABI_ELFv2)
11321 && type && TYPE_ALIGN (type) > 64)
11323 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11324 or homogeneous float/vector aggregates here. We already handled
11325 vector aggregates above, but still need to check for float here. */
11326 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11327 && !SCALAR_FLOAT_MODE_P (elt_mode));
11329 /* We used to check for BLKmode instead of the above aggregate type
11330 check. Warn when this results in any difference to the ABI. */
11331 if (aggregate_p != (mode == BLKmode))
11333 static bool warned;
11334 if (!warned && warn_psabi)
11336 warned = true;
11337 inform (input_location,
11338 "the ABI of passing aggregates with %d-byte alignment"
11339 " has changed in GCC 5",
11340 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11344 if (aggregate_p)
11345 return 128;
11348 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11349 implement the "aggregate type" check as a BLKmode check here; this
11350 means certain aggregate types are in fact not aligned. */
11351 if (TARGET_MACHO && rs6000_darwin64_abi
11352 && mode == BLKmode
11353 && type && TYPE_ALIGN (type) > 64)
11354 return 128;
11356 return PARM_BOUNDARY;
11359 /* The offset in words to the start of the parameter save area. */
11361 static unsigned int
11362 rs6000_parm_offset (void)
11364 return (DEFAULT_ABI == ABI_V4 ? 2
11365 : DEFAULT_ABI == ABI_ELFv2 ? 4
11366 : 6);
11369 /* For a function parm of MODE and TYPE, return the starting word in
11370 the parameter area. NWORDS of the parameter area are already used. */
11372 static unsigned int
11373 rs6000_parm_start (machine_mode mode, const_tree type,
11374 unsigned int nwords)
11376 unsigned int align;
11378 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11379 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11382 /* Compute the size (in words) of a function argument. */
11384 static unsigned long
11385 rs6000_arg_size (machine_mode mode, const_tree type)
11387 unsigned long size;
11389 if (mode != BLKmode)
11390 size = GET_MODE_SIZE (mode);
11391 else
11392 size = int_size_in_bytes (type);
11394 if (TARGET_32BIT)
11395 return (size + 3) >> 2;
11396 else
11397 return (size + 7) >> 3;
11400 /* Use this to flush pending int fields. */
11402 static void
11403 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11404 HOST_WIDE_INT bitpos, int final)
11406 unsigned int startbit, endbit;
11407 int intregs, intoffset;
11409 /* Handle the situations where a float is taking up the first half
11410 of the GPR, and the other half is empty (typically due to
11411 alignment restrictions). We can detect this by a 8-byte-aligned
11412 int field, or by seeing that this is the final flush for this
11413 argument. Count the word and continue on. */
11414 if (cum->floats_in_gpr == 1
11415 && (cum->intoffset % 64 == 0
11416 || (cum->intoffset == -1 && final)))
11418 cum->words++;
11419 cum->floats_in_gpr = 0;
11422 if (cum->intoffset == -1)
11423 return;
11425 intoffset = cum->intoffset;
11426 cum->intoffset = -1;
11427 cum->floats_in_gpr = 0;
11429 if (intoffset % BITS_PER_WORD != 0)
11431 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11432 if (!int_mode_for_size (bits, 0).exists ())
11434 /* We couldn't find an appropriate mode, which happens,
11435 e.g., in packed structs when there are 3 bytes to load.
11436 Back intoffset back to the beginning of the word in this
11437 case. */
11438 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11442 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11443 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11444 intregs = (endbit - startbit) / BITS_PER_WORD;
11445 cum->words += intregs;
11446 /* words should be unsigned. */
11447 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11449 int pad = (endbit/BITS_PER_WORD) - cum->words;
11450 cum->words += pad;
11454 /* The darwin64 ABI calls for us to recurse down through structs,
11455 looking for elements passed in registers. Unfortunately, we have
11456 to track int register count here also because of misalignments
11457 in powerpc alignment mode. */
11459 static void
11460 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11461 const_tree type,
11462 HOST_WIDE_INT startbitpos)
11464 tree f;
11466 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11467 if (TREE_CODE (f) == FIELD_DECL)
11469 HOST_WIDE_INT bitpos = startbitpos;
11470 tree ftype = TREE_TYPE (f);
11471 machine_mode mode;
11472 if (ftype == error_mark_node)
11473 continue;
11474 mode = TYPE_MODE (ftype);
11476 if (DECL_SIZE (f) != 0
11477 && tree_fits_uhwi_p (bit_position (f)))
11478 bitpos += int_bit_position (f);
11480 /* ??? FIXME: else assume zero offset. */
11482 if (TREE_CODE (ftype) == RECORD_TYPE)
11483 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11484 else if (USE_FP_FOR_ARG_P (cum, mode))
11486 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11487 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11488 cum->fregno += n_fpregs;
11489 /* Single-precision floats present a special problem for
11490 us, because they are smaller than an 8-byte GPR, and so
11491 the structure-packing rules combined with the standard
11492 varargs behavior mean that we want to pack float/float
11493 and float/int combinations into a single register's
11494 space. This is complicated by the arg advance flushing,
11495 which works on arbitrarily large groups of int-type
11496 fields. */
11497 if (mode == SFmode)
11499 if (cum->floats_in_gpr == 1)
11501 /* Two floats in a word; count the word and reset
11502 the float count. */
11503 cum->words++;
11504 cum->floats_in_gpr = 0;
11506 else if (bitpos % 64 == 0)
11508 /* A float at the beginning of an 8-byte word;
11509 count it and put off adjusting cum->words until
11510 we see if a arg advance flush is going to do it
11511 for us. */
11512 cum->floats_in_gpr++;
11514 else
11516 /* The float is at the end of a word, preceded
11517 by integer fields, so the arg advance flush
11518 just above has already set cum->words and
11519 everything is taken care of. */
11522 else
11523 cum->words += n_fpregs;
11525 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11527 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11528 cum->vregno++;
11529 cum->words += 2;
11531 else if (cum->intoffset == -1)
11532 cum->intoffset = bitpos;
11536 /* Check for an item that needs to be considered specially under the darwin 64
11537 bit ABI. These are record types where the mode is BLK or the structure is
11538 8 bytes in size. */
11539 static int
11540 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11542 return rs6000_darwin64_abi
11543 && ((mode == BLKmode
11544 && TREE_CODE (type) == RECORD_TYPE
11545 && int_size_in_bytes (type) > 0)
11546 || (type && TREE_CODE (type) == RECORD_TYPE
11547 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11550 /* Update the data in CUM to advance over an argument
11551 of mode MODE and data type TYPE.
11552 (TYPE is null for libcalls where that information may not be available.)
11554 Note that for args passed by reference, function_arg will be called
11555 with MODE and TYPE set to that of the pointer to the arg, not the arg
11556 itself. */
11558 static void
11559 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11560 const_tree type, bool named, int depth)
11562 machine_mode elt_mode;
11563 int n_elts;
11565 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11567 /* Only tick off an argument if we're not recursing. */
11568 if (depth == 0)
11569 cum->nargs_prototype--;
11571 #ifdef HAVE_AS_GNU_ATTRIBUTE
11572 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11573 && cum->escapes)
11575 if (SCALAR_FLOAT_MODE_P (mode))
11577 rs6000_passes_float = true;
11578 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11579 && (FLOAT128_IBM_P (mode)
11580 || FLOAT128_IEEE_P (mode)
11581 || (type != NULL
11582 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11583 rs6000_passes_long_double = true;
11585 /* Note if we passed or return a IEEE 128-bit type. We changed the
11586 mangling for these types, and we may need to make an alias with
11587 the old mangling. */
11588 if (FLOAT128_IEEE_P (mode))
11589 rs6000_passes_ieee128 = true;
11591 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11592 rs6000_passes_vector = true;
11594 #endif
11596 if (TARGET_ALTIVEC_ABI
11597 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11598 || (type && TREE_CODE (type) == VECTOR_TYPE
11599 && int_size_in_bytes (type) == 16)))
11601 bool stack = false;
11603 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11605 cum->vregno += n_elts;
11607 if (!TARGET_ALTIVEC)
11608 error ("cannot pass argument in vector register because"
11609 " altivec instructions are disabled, use %qs"
11610 " to enable them", "-maltivec");
11612 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11613 even if it is going to be passed in a vector register.
11614 Darwin does the same for variable-argument functions. */
11615 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11616 && TARGET_64BIT)
11617 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11618 stack = true;
11620 else
11621 stack = true;
11623 if (stack)
11625 int align;
11627 /* Vector parameters must be 16-byte aligned. In 32-bit
11628 mode this means we need to take into account the offset
11629 to the parameter save area. In 64-bit mode, they just
11630 have to start on an even word, since the parameter save
11631 area is 16-byte aligned. */
11632 if (TARGET_32BIT)
11633 align = -(rs6000_parm_offset () + cum->words) & 3;
11634 else
11635 align = cum->words & 1;
11636 cum->words += align + rs6000_arg_size (mode, type);
11638 if (TARGET_DEBUG_ARG)
11640 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11641 cum->words, align);
11642 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11643 cum->nargs_prototype, cum->prototype,
11644 GET_MODE_NAME (mode));
11648 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11650 int size = int_size_in_bytes (type);
11651 /* Variable sized types have size == -1 and are
11652 treated as if consisting entirely of ints.
11653 Pad to 16 byte boundary if needed. */
11654 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11655 && (cum->words % 2) != 0)
11656 cum->words++;
11657 /* For varargs, we can just go up by the size of the struct. */
11658 if (!named)
11659 cum->words += (size + 7) / 8;
11660 else
11662 /* It is tempting to say int register count just goes up by
11663 sizeof(type)/8, but this is wrong in a case such as
11664 { int; double; int; } [powerpc alignment]. We have to
11665 grovel through the fields for these too. */
11666 cum->intoffset = 0;
11667 cum->floats_in_gpr = 0;
11668 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11669 rs6000_darwin64_record_arg_advance_flush (cum,
11670 size * BITS_PER_UNIT, 1);
11672 if (TARGET_DEBUG_ARG)
11674 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11675 cum->words, TYPE_ALIGN (type), size);
11676 fprintf (stderr,
11677 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11678 cum->nargs_prototype, cum->prototype,
11679 GET_MODE_NAME (mode));
11682 else if (DEFAULT_ABI == ABI_V4)
11684 if (abi_v4_pass_in_fpr (mode, named))
11686 /* _Decimal128 must use an even/odd register pair. This assumes
11687 that the register number is odd when fregno is odd. */
11688 if (mode == TDmode && (cum->fregno % 2) == 1)
11689 cum->fregno++;
11691 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11692 <= FP_ARG_V4_MAX_REG)
11693 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11694 else
11696 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11697 if (mode == DFmode || FLOAT128_IBM_P (mode)
11698 || mode == DDmode || mode == TDmode)
11699 cum->words += cum->words & 1;
11700 cum->words += rs6000_arg_size (mode, type);
11703 else
11705 int n_words = rs6000_arg_size (mode, type);
11706 int gregno = cum->sysv_gregno;
11708 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11709 As does any other 2 word item such as complex int due to a
11710 historical mistake. */
11711 if (n_words == 2)
11712 gregno += (1 - gregno) & 1;
11714 /* Multi-reg args are not split between registers and stack. */
11715 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11717 /* Long long is aligned on the stack. So are other 2 word
11718 items such as complex int due to a historical mistake. */
11719 if (n_words == 2)
11720 cum->words += cum->words & 1;
11721 cum->words += n_words;
11724 /* Note: continuing to accumulate gregno past when we've started
11725 spilling to the stack indicates the fact that we've started
11726 spilling to the stack to expand_builtin_saveregs. */
11727 cum->sysv_gregno = gregno + n_words;
11730 if (TARGET_DEBUG_ARG)
11732 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11733 cum->words, cum->fregno);
11734 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11735 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11736 fprintf (stderr, "mode = %4s, named = %d\n",
11737 GET_MODE_NAME (mode), named);
11740 else
11742 int n_words = rs6000_arg_size (mode, type);
11743 int start_words = cum->words;
11744 int align_words = rs6000_parm_start (mode, type, start_words);
11746 cum->words = align_words + n_words;
11748 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11750 /* _Decimal128 must be passed in an even/odd float register pair.
11751 This assumes that the register number is odd when fregno is
11752 odd. */
11753 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11754 cum->fregno++;
11755 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11758 if (TARGET_DEBUG_ARG)
11760 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11761 cum->words, cum->fregno);
11762 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11763 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11764 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11765 named, align_words - start_words, depth);
11770 static void
11771 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11772 const_tree type, bool named)
11774 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11778 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11779 structure between cum->intoffset and bitpos to integer registers. */
11781 static void
11782 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11783 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11785 machine_mode mode;
11786 unsigned int regno;
11787 unsigned int startbit, endbit;
11788 int this_regno, intregs, intoffset;
11789 rtx reg;
11791 if (cum->intoffset == -1)
11792 return;
11794 intoffset = cum->intoffset;
11795 cum->intoffset = -1;
11797 /* If this is the trailing part of a word, try to only load that
11798 much into the register. Otherwise load the whole register. Note
11799 that in the latter case we may pick up unwanted bits. It's not a
11800 problem at the moment but may wish to revisit. */
11802 if (intoffset % BITS_PER_WORD != 0)
11804 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11805 if (!int_mode_for_size (bits, 0).exists (&mode))
11807 /* We couldn't find an appropriate mode, which happens,
11808 e.g., in packed structs when there are 3 bytes to load.
11809 Back intoffset back to the beginning of the word in this
11810 case. */
11811 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11812 mode = word_mode;
11815 else
11816 mode = word_mode;
11818 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11819 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11820 intregs = (endbit - startbit) / BITS_PER_WORD;
11821 this_regno = cum->words + intoffset / BITS_PER_WORD;
11823 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11824 cum->use_stack = 1;
11826 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11827 if (intregs <= 0)
11828 return;
11830 intoffset /= BITS_PER_UNIT;
11833 regno = GP_ARG_MIN_REG + this_regno;
11834 reg = gen_rtx_REG (mode, regno);
11835 rvec[(*k)++] =
11836 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11838 this_regno += 1;
11839 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11840 mode = word_mode;
11841 intregs -= 1;
11843 while (intregs > 0);
11846 /* Recursive workhorse for the following. */
11848 static void
11849 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11850 HOST_WIDE_INT startbitpos, rtx rvec[],
11851 int *k)
11853 tree f;
11855 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11856 if (TREE_CODE (f) == FIELD_DECL)
11858 HOST_WIDE_INT bitpos = startbitpos;
11859 tree ftype = TREE_TYPE (f);
11860 machine_mode mode;
11861 if (ftype == error_mark_node)
11862 continue;
11863 mode = TYPE_MODE (ftype);
11865 if (DECL_SIZE (f) != 0
11866 && tree_fits_uhwi_p (bit_position (f)))
11867 bitpos += int_bit_position (f);
11869 /* ??? FIXME: else assume zero offset. */
11871 if (TREE_CODE (ftype) == RECORD_TYPE)
11872 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11873 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11875 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11876 #if 0
11877 switch (mode)
11879 case E_SCmode: mode = SFmode; break;
11880 case E_DCmode: mode = DFmode; break;
11881 case E_TCmode: mode = TFmode; break;
11882 default: break;
11884 #endif
11885 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11886 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11888 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11889 && (mode == TFmode || mode == TDmode));
11890 /* Long double or _Decimal128 split over regs and memory. */
11891 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11892 cum->use_stack=1;
11894 rvec[(*k)++]
11895 = gen_rtx_EXPR_LIST (VOIDmode,
11896 gen_rtx_REG (mode, cum->fregno++),
11897 GEN_INT (bitpos / BITS_PER_UNIT));
11898 if (FLOAT128_2REG_P (mode))
11899 cum->fregno++;
11901 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11903 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11904 rvec[(*k)++]
11905 = gen_rtx_EXPR_LIST (VOIDmode,
11906 gen_rtx_REG (mode, cum->vregno++),
11907 GEN_INT (bitpos / BITS_PER_UNIT));
11909 else if (cum->intoffset == -1)
11910 cum->intoffset = bitpos;
11914 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11915 the register(s) to be used for each field and subfield of a struct
11916 being passed by value, along with the offset of where the
11917 register's value may be found in the block. FP fields go in FP
11918 register, vector fields go in vector registers, and everything
11919 else goes in int registers, packed as in memory.
11921 This code is also used for function return values. RETVAL indicates
11922 whether this is the case.
11924 Much of this is taken from the SPARC V9 port, which has a similar
11925 calling convention. */
11927 static rtx
11928 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11929 bool named, bool retval)
11931 rtx rvec[FIRST_PSEUDO_REGISTER];
11932 int k = 1, kbase = 1;
11933 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11934 /* This is a copy; modifications are not visible to our caller. */
11935 CUMULATIVE_ARGS copy_cum = *orig_cum;
11936 CUMULATIVE_ARGS *cum = &copy_cum;
11938 /* Pad to 16 byte boundary if needed. */
11939 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11940 && (cum->words % 2) != 0)
11941 cum->words++;
11943 cum->intoffset = 0;
11944 cum->use_stack = 0;
11945 cum->named = named;
11947 /* Put entries into rvec[] for individual FP and vector fields, and
11948 for the chunks of memory that go in int regs. Note we start at
11949 element 1; 0 is reserved for an indication of using memory, and
11950 may or may not be filled in below. */
11951 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11952 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11954 /* If any part of the struct went on the stack put all of it there.
11955 This hack is because the generic code for
11956 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11957 parts of the struct are not at the beginning. */
11958 if (cum->use_stack)
11960 if (retval)
11961 return NULL_RTX; /* doesn't go in registers at all */
11962 kbase = 0;
11963 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11965 if (k > 1 || cum->use_stack)
11966 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11967 else
11968 return NULL_RTX;
11971 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11973 static rtx
11974 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11975 int align_words)
11977 int n_units;
11978 int i, k;
11979 rtx rvec[GP_ARG_NUM_REG + 1];
11981 if (align_words >= GP_ARG_NUM_REG)
11982 return NULL_RTX;
11984 n_units = rs6000_arg_size (mode, type);
11986 /* Optimize the simple case where the arg fits in one gpr, except in
11987 the case of BLKmode due to assign_parms assuming that registers are
11988 BITS_PER_WORD wide. */
11989 if (n_units == 0
11990 || (n_units == 1 && mode != BLKmode))
11991 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11993 k = 0;
11994 if (align_words + n_units > GP_ARG_NUM_REG)
11995 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11996 using a magic NULL_RTX component.
11997 This is not strictly correct. Only some of the arg belongs in
11998 memory, not all of it. However, the normal scheme using
11999 function_arg_partial_nregs can result in unusual subregs, eg.
12000 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12001 store the whole arg to memory is often more efficient than code
12002 to store pieces, and we know that space is available in the right
12003 place for the whole arg. */
12004 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12006 i = 0;
12009 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12010 rtx off = GEN_INT (i++ * 4);
12011 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12013 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12015 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12018 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12019 but must also be copied into the parameter save area starting at
12020 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12021 to the GPRs and/or memory. Return the number of elements used. */
12023 static int
12024 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12025 int align_words, rtx *rvec)
12027 int k = 0;
12029 if (align_words < GP_ARG_NUM_REG)
12031 int n_words = rs6000_arg_size (mode, type);
12033 if (align_words + n_words > GP_ARG_NUM_REG
12034 || mode == BLKmode
12035 || (TARGET_32BIT && TARGET_POWERPC64))
12037 /* If this is partially on the stack, then we only
12038 include the portion actually in registers here. */
12039 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12040 int i = 0;
12042 if (align_words + n_words > GP_ARG_NUM_REG)
12044 /* Not all of the arg fits in gprs. Say that it goes in memory
12045 too, using a magic NULL_RTX component. Also see comment in
12046 rs6000_mixed_function_arg for why the normal
12047 function_arg_partial_nregs scheme doesn't work in this case. */
12048 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12053 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12054 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12055 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12057 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12059 else
12061 /* The whole arg fits in gprs. */
12062 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12063 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12066 else
12068 /* It's entirely in memory. */
12069 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12072 return k;
12075 /* RVEC is a vector of K components of an argument of mode MODE.
12076 Construct the final function_arg return value from it. */
12078 static rtx
12079 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12081 gcc_assert (k >= 1);
12083 /* Avoid returning a PARALLEL in the trivial cases. */
12084 if (k == 1)
12086 if (XEXP (rvec[0], 0) == NULL_RTX)
12087 return NULL_RTX;
12089 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12090 return XEXP (rvec[0], 0);
12093 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12096 /* Determine where to put an argument to a function.
12097 Value is zero to push the argument on the stack,
12098 or a hard register in which to store the argument.
12100 MODE is the argument's machine mode.
12101 TYPE is the data type of the argument (as a tree).
12102 This is null for libcalls where that information may
12103 not be available.
12104 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12105 the preceding args and about the function being called. It is
12106 not modified in this routine.
12107 NAMED is nonzero if this argument is a named parameter
12108 (otherwise it is an extra parameter matching an ellipsis).
12110 On RS/6000 the first eight words of non-FP are normally in registers
12111 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12112 Under V.4, the first 8 FP args are in registers.
12114 If this is floating-point and no prototype is specified, we use
12115 both an FP and integer register (or possibly FP reg and stack). Library
12116 functions (when CALL_LIBCALL is set) always have the proper types for args,
12117 so we can pass the FP value just in one register. emit_library_function
12118 doesn't support PARALLEL anyway.
12120 Note that for args passed by reference, function_arg will be called
12121 with MODE and TYPE set to that of the pointer to the arg, not the arg
12122 itself. */
12124 static rtx
12125 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12126 const_tree type, bool named)
12128 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12129 enum rs6000_abi abi = DEFAULT_ABI;
12130 machine_mode elt_mode;
12131 int n_elts;
12133 /* Return a marker to indicate whether CR1 needs to set or clear the
12134 bit that V.4 uses to say fp args were passed in registers.
12135 Assume that we don't need the marker for software floating point,
12136 or compiler generated library calls. */
12137 if (mode == VOIDmode)
12139 if (abi == ABI_V4
12140 && (cum->call_cookie & CALL_LIBCALL) == 0
12141 && (cum->stdarg
12142 || (cum->nargs_prototype < 0
12143 && (cum->prototype || TARGET_NO_PROTOTYPE)))
12144 && TARGET_HARD_FLOAT)
12145 return GEN_INT (cum->call_cookie
12146 | ((cum->fregno == FP_ARG_MIN_REG)
12147 ? CALL_V4_SET_FP_ARGS
12148 : CALL_V4_CLEAR_FP_ARGS));
12150 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12153 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12155 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12157 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12158 if (rslt != NULL_RTX)
12159 return rslt;
12160 /* Else fall through to usual handling. */
12163 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12165 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12166 rtx r, off;
12167 int i, k = 0;
12169 /* Do we also need to pass this argument in the parameter save area?
12170 Library support functions for IEEE 128-bit are assumed to not need the
12171 value passed both in GPRs and in vector registers. */
12172 if (TARGET_64BIT && !cum->prototype
12173 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12175 int align_words = ROUND_UP (cum->words, 2);
12176 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12179 /* Describe where this argument goes in the vector registers. */
12180 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12182 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12183 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12184 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12187 return rs6000_finish_function_arg (mode, rvec, k);
12189 else if (TARGET_ALTIVEC_ABI
12190 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12191 || (type && TREE_CODE (type) == VECTOR_TYPE
12192 && int_size_in_bytes (type) == 16)))
12194 if (named || abi == ABI_V4)
12195 return NULL_RTX;
12196 else
12198 /* Vector parameters to varargs functions under AIX or Darwin
12199 get passed in memory and possibly also in GPRs. */
12200 int align, align_words, n_words;
12201 machine_mode part_mode;
12203 /* Vector parameters must be 16-byte aligned. In 32-bit
12204 mode this means we need to take into account the offset
12205 to the parameter save area. In 64-bit mode, they just
12206 have to start on an even word, since the parameter save
12207 area is 16-byte aligned. */
12208 if (TARGET_32BIT)
12209 align = -(rs6000_parm_offset () + cum->words) & 3;
12210 else
12211 align = cum->words & 1;
12212 align_words = cum->words + align;
12214 /* Out of registers? Memory, then. */
12215 if (align_words >= GP_ARG_NUM_REG)
12216 return NULL_RTX;
12218 if (TARGET_32BIT && TARGET_POWERPC64)
12219 return rs6000_mixed_function_arg (mode, type, align_words);
12221 /* The vector value goes in GPRs. Only the part of the
12222 value in GPRs is reported here. */
12223 part_mode = mode;
12224 n_words = rs6000_arg_size (mode, type);
12225 if (align_words + n_words > GP_ARG_NUM_REG)
12226 /* Fortunately, there are only two possibilities, the value
12227 is either wholly in GPRs or half in GPRs and half not. */
12228 part_mode = DImode;
12230 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12234 else if (abi == ABI_V4)
12236 if (abi_v4_pass_in_fpr (mode, named))
12238 /* _Decimal128 must use an even/odd register pair. This assumes
12239 that the register number is odd when fregno is odd. */
12240 if (mode == TDmode && (cum->fregno % 2) == 1)
12241 cum->fregno++;
12243 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12244 <= FP_ARG_V4_MAX_REG)
12245 return gen_rtx_REG (mode, cum->fregno);
12246 else
12247 return NULL_RTX;
12249 else
12251 int n_words = rs6000_arg_size (mode, type);
12252 int gregno = cum->sysv_gregno;
12254 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12255 As does any other 2 word item such as complex int due to a
12256 historical mistake. */
12257 if (n_words == 2)
12258 gregno += (1 - gregno) & 1;
12260 /* Multi-reg args are not split between registers and stack. */
12261 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12262 return NULL_RTX;
12264 if (TARGET_32BIT && TARGET_POWERPC64)
12265 return rs6000_mixed_function_arg (mode, type,
12266 gregno - GP_ARG_MIN_REG);
12267 return gen_rtx_REG (mode, gregno);
12270 else
12272 int align_words = rs6000_parm_start (mode, type, cum->words);
12274 /* _Decimal128 must be passed in an even/odd float register pair.
12275 This assumes that the register number is odd when fregno is odd. */
12276 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12277 cum->fregno++;
12279 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12281 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12282 rtx r, off;
12283 int i, k = 0;
12284 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12285 int fpr_words;
12287 /* Do we also need to pass this argument in the parameter
12288 save area? */
12289 if (type && (cum->nargs_prototype <= 0
12290 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12291 && TARGET_XL_COMPAT
12292 && align_words >= GP_ARG_NUM_REG)))
12293 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12295 /* Describe where this argument goes in the fprs. */
12296 for (i = 0; i < n_elts
12297 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12299 /* Check if the argument is split over registers and memory.
12300 This can only ever happen for long double or _Decimal128;
12301 complex types are handled via split_complex_arg. */
12302 machine_mode fmode = elt_mode;
12303 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12305 gcc_assert (FLOAT128_2REG_P (fmode));
12306 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12309 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12310 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12311 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12314 /* If there were not enough FPRs to hold the argument, the rest
12315 usually goes into memory. However, if the current position
12316 is still within the register parameter area, a portion may
12317 actually have to go into GPRs.
12319 Note that it may happen that the portion of the argument
12320 passed in the first "half" of the first GPR was already
12321 passed in the last FPR as well.
12323 For unnamed arguments, we already set up GPRs to cover the
12324 whole argument in rs6000_psave_function_arg, so there is
12325 nothing further to do at this point. */
12326 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12327 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12328 && cum->nargs_prototype > 0)
12330 static bool warned;
12332 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12333 int n_words = rs6000_arg_size (mode, type);
12335 align_words += fpr_words;
12336 n_words -= fpr_words;
12340 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12341 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12342 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12344 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12346 if (!warned && warn_psabi)
12348 warned = true;
12349 inform (input_location,
12350 "the ABI of passing homogeneous float aggregates"
12351 " has changed in GCC 5");
12355 return rs6000_finish_function_arg (mode, rvec, k);
12357 else if (align_words < GP_ARG_NUM_REG)
12359 if (TARGET_32BIT && TARGET_POWERPC64)
12360 return rs6000_mixed_function_arg (mode, type, align_words);
12362 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12364 else
12365 return NULL_RTX;
12369 /* For an arg passed partly in registers and partly in memory, this is
12370 the number of bytes passed in registers. For args passed entirely in
12371 registers or entirely in memory, zero. When an arg is described by a
12372 PARALLEL, perhaps using more than one register type, this function
12373 returns the number of bytes used by the first element of the PARALLEL. */
12375 static int
12376 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12377 tree type, bool named)
12379 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12380 bool passed_in_gprs = true;
12381 int ret = 0;
12382 int align_words;
12383 machine_mode elt_mode;
12384 int n_elts;
12386 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12388 if (DEFAULT_ABI == ABI_V4)
12389 return 0;
12391 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12393 /* If we are passing this arg in the fixed parameter save area (gprs or
12394 memory) as well as VRs, we do not use the partial bytes mechanism;
12395 instead, rs6000_function_arg will return a PARALLEL including a memory
12396 element as necessary. Library support functions for IEEE 128-bit are
12397 assumed to not need the value passed both in GPRs and in vector
12398 registers. */
12399 if (TARGET_64BIT && !cum->prototype
12400 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12401 return 0;
12403 /* Otherwise, we pass in VRs only. Check for partial copies. */
12404 passed_in_gprs = false;
12405 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12406 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12409 /* In this complicated case we just disable the partial_nregs code. */
12410 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12411 return 0;
12413 align_words = rs6000_parm_start (mode, type, cum->words);
12415 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12417 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12419 /* If we are passing this arg in the fixed parameter save area
12420 (gprs or memory) as well as FPRs, we do not use the partial
12421 bytes mechanism; instead, rs6000_function_arg will return a
12422 PARALLEL including a memory element as necessary. */
12423 if (type
12424 && (cum->nargs_prototype <= 0
12425 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12426 && TARGET_XL_COMPAT
12427 && align_words >= GP_ARG_NUM_REG)))
12428 return 0;
12430 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12431 passed_in_gprs = false;
12432 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12434 /* Compute number of bytes / words passed in FPRs. If there
12435 is still space available in the register parameter area
12436 *after* that amount, a part of the argument will be passed
12437 in GPRs. In that case, the total amount passed in any
12438 registers is equal to the amount that would have been passed
12439 in GPRs if everything were passed there, so we fall back to
12440 the GPR code below to compute the appropriate value. */
12441 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12442 * MIN (8, GET_MODE_SIZE (elt_mode)));
12443 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12445 if (align_words + fpr_words < GP_ARG_NUM_REG)
12446 passed_in_gprs = true;
12447 else
12448 ret = fpr;
12452 if (passed_in_gprs
12453 && align_words < GP_ARG_NUM_REG
12454 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12455 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12457 if (ret != 0 && TARGET_DEBUG_ARG)
12458 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12460 return ret;
12463 /* A C expression that indicates when an argument must be passed by
12464 reference. If nonzero for an argument, a copy of that argument is
12465 made in memory and a pointer to the argument is passed instead of
12466 the argument itself. The pointer is passed in whatever way is
12467 appropriate for passing a pointer to that type.
12469 Under V.4, aggregates and long double are passed by reference.
12471 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12472 reference unless the AltiVec vector extension ABI is in force.
12474 As an extension to all ABIs, variable sized types are passed by
12475 reference. */
12477 static bool
12478 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12479 machine_mode mode, const_tree type,
12480 bool named ATTRIBUTE_UNUSED)
12482 if (!type)
12483 return 0;
12485 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12486 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12488 if (TARGET_DEBUG_ARG)
12489 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12490 return 1;
12493 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12495 if (TARGET_DEBUG_ARG)
12496 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12497 return 1;
12500 if (int_size_in_bytes (type) < 0)
12502 if (TARGET_DEBUG_ARG)
12503 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12504 return 1;
12507 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12508 modes only exist for GCC vector types if -maltivec. */
12509 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12511 if (TARGET_DEBUG_ARG)
12512 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12513 return 1;
12516 /* Pass synthetic vectors in memory. */
12517 if (TREE_CODE (type) == VECTOR_TYPE
12518 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12520 static bool warned_for_pass_big_vectors = false;
12521 if (TARGET_DEBUG_ARG)
12522 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12523 if (!warned_for_pass_big_vectors)
12525 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12526 "non-standard ABI extension with no compatibility "
12527 "guarantee");
12528 warned_for_pass_big_vectors = true;
12530 return 1;
12533 return 0;
12536 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12537 already processes. Return true if the parameter must be passed
12538 (fully or partially) on the stack. */
12540 static bool
12541 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12543 machine_mode mode;
12544 int unsignedp;
12545 rtx entry_parm;
12547 /* Catch errors. */
12548 if (type == NULL || type == error_mark_node)
12549 return true;
12551 /* Handle types with no storage requirement. */
12552 if (TYPE_MODE (type) == VOIDmode)
12553 return false;
12555 /* Handle complex types. */
12556 if (TREE_CODE (type) == COMPLEX_TYPE)
12557 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12558 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12560 /* Handle transparent aggregates. */
12561 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12562 && TYPE_TRANSPARENT_AGGR (type))
12563 type = TREE_TYPE (first_field (type));
12565 /* See if this arg was passed by invisible reference. */
12566 if (pass_by_reference (get_cumulative_args (args_so_far),
12567 TYPE_MODE (type), type, true))
12568 type = build_pointer_type (type);
12570 /* Find mode as it is passed by the ABI. */
12571 unsignedp = TYPE_UNSIGNED (type);
12572 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12574 /* If we must pass in stack, we need a stack. */
12575 if (rs6000_must_pass_in_stack (mode, type))
12576 return true;
12578 /* If there is no incoming register, we need a stack. */
12579 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12580 if (entry_parm == NULL)
12581 return true;
12583 /* Likewise if we need to pass both in registers and on the stack. */
12584 if (GET_CODE (entry_parm) == PARALLEL
12585 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12586 return true;
12588 /* Also true if we're partially in registers and partially not. */
12589 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12590 return true;
12592 /* Update info on where next arg arrives in registers. */
12593 rs6000_function_arg_advance (args_so_far, mode, type, true);
12594 return false;
12597 /* Return true if FUN has no prototype, has a variable argument
12598 list, or passes any parameter in memory. */
12600 static bool
12601 rs6000_function_parms_need_stack (tree fun, bool incoming)
12603 tree fntype, result;
12604 CUMULATIVE_ARGS args_so_far_v;
12605 cumulative_args_t args_so_far;
12607 if (!fun)
12608 /* Must be a libcall, all of which only use reg parms. */
12609 return false;
12611 fntype = fun;
12612 if (!TYPE_P (fun))
12613 fntype = TREE_TYPE (fun);
12615 /* Varargs functions need the parameter save area. */
12616 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12617 return true;
12619 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12620 args_so_far = pack_cumulative_args (&args_so_far_v);
12622 /* When incoming, we will have been passed the function decl.
12623 It is necessary to use the decl to handle K&R style functions,
12624 where TYPE_ARG_TYPES may not be available. */
12625 if (incoming)
12627 gcc_assert (DECL_P (fun));
12628 result = DECL_RESULT (fun);
12630 else
12631 result = TREE_TYPE (fntype);
12633 if (result && aggregate_value_p (result, fntype))
12635 if (!TYPE_P (result))
12636 result = TREE_TYPE (result);
12637 result = build_pointer_type (result);
12638 rs6000_parm_needs_stack (args_so_far, result);
12641 if (incoming)
12643 tree parm;
12645 for (parm = DECL_ARGUMENTS (fun);
12646 parm && parm != void_list_node;
12647 parm = TREE_CHAIN (parm))
12648 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12649 return true;
12651 else
12653 function_args_iterator args_iter;
12654 tree arg_type;
12656 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12657 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12658 return true;
12661 return false;
12664 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12665 usually a constant depending on the ABI. However, in the ELFv2 ABI
12666 the register parameter area is optional when calling a function that
12667 has a prototype is scope, has no variable argument list, and passes
12668 all parameters in registers. */
12671 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12673 int reg_parm_stack_space;
12675 switch (DEFAULT_ABI)
12677 default:
12678 reg_parm_stack_space = 0;
12679 break;
12681 case ABI_AIX:
12682 case ABI_DARWIN:
12683 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12684 break;
12686 case ABI_ELFv2:
12687 /* ??? Recomputing this every time is a bit expensive. Is there
12688 a place to cache this information? */
12689 if (rs6000_function_parms_need_stack (fun, incoming))
12690 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12691 else
12692 reg_parm_stack_space = 0;
12693 break;
12696 return reg_parm_stack_space;
12699 static void
12700 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12702 int i;
12703 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12705 if (nregs == 0)
12706 return;
12708 for (i = 0; i < nregs; i++)
12710 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12711 if (reload_completed)
12713 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12714 tem = NULL_RTX;
12715 else
12716 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12717 i * GET_MODE_SIZE (reg_mode));
12719 else
12720 tem = replace_equiv_address (tem, XEXP (tem, 0));
12722 gcc_assert (tem);
12724 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12728 /* Perform any needed actions needed for a function that is receiving a
12729 variable number of arguments.
12731 CUM is as above.
12733 MODE and TYPE are the mode and type of the current parameter.
12735 PRETEND_SIZE is a variable that should be set to the amount of stack
12736 that must be pushed by the prolog to pretend that our caller pushed
12739 Normally, this macro will push all remaining incoming registers on the
12740 stack and set PRETEND_SIZE to the length of the registers pushed. */
12742 static void
12743 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12744 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12745 int no_rtl)
12747 CUMULATIVE_ARGS next_cum;
12748 int reg_size = TARGET_32BIT ? 4 : 8;
12749 rtx save_area = NULL_RTX, mem;
12750 int first_reg_offset;
12751 alias_set_type set;
12753 /* Skip the last named argument. */
12754 next_cum = *get_cumulative_args (cum);
12755 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12757 if (DEFAULT_ABI == ABI_V4)
12759 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12761 if (! no_rtl)
12763 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12764 HOST_WIDE_INT offset = 0;
12766 /* Try to optimize the size of the varargs save area.
12767 The ABI requires that ap.reg_save_area is doubleword
12768 aligned, but we don't need to allocate space for all
12769 the bytes, only those to which we actually will save
12770 anything. */
12771 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12772 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12773 if (TARGET_HARD_FLOAT
12774 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12775 && cfun->va_list_fpr_size)
12777 if (gpr_reg_num)
12778 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12779 * UNITS_PER_FP_WORD;
12780 if (cfun->va_list_fpr_size
12781 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12782 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12783 else
12784 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12785 * UNITS_PER_FP_WORD;
12787 if (gpr_reg_num)
12789 offset = -((first_reg_offset * reg_size) & ~7);
12790 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12792 gpr_reg_num = cfun->va_list_gpr_size;
12793 if (reg_size == 4 && (first_reg_offset & 1))
12794 gpr_reg_num++;
12796 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12798 else if (fpr_size)
12799 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12800 * UNITS_PER_FP_WORD
12801 - (int) (GP_ARG_NUM_REG * reg_size);
12803 if (gpr_size + fpr_size)
12805 rtx reg_save_area
12806 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12807 gcc_assert (GET_CODE (reg_save_area) == MEM);
12808 reg_save_area = XEXP (reg_save_area, 0);
12809 if (GET_CODE (reg_save_area) == PLUS)
12811 gcc_assert (XEXP (reg_save_area, 0)
12812 == virtual_stack_vars_rtx);
12813 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12814 offset += INTVAL (XEXP (reg_save_area, 1));
12816 else
12817 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12820 cfun->machine->varargs_save_offset = offset;
12821 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12824 else
12826 first_reg_offset = next_cum.words;
12827 save_area = crtl->args.internal_arg_pointer;
12829 if (targetm.calls.must_pass_in_stack (mode, type))
12830 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12833 set = get_varargs_alias_set ();
12834 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12835 && cfun->va_list_gpr_size)
12837 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12839 if (va_list_gpr_counter_field)
12840 /* V4 va_list_gpr_size counts number of registers needed. */
12841 n_gpr = cfun->va_list_gpr_size;
12842 else
12843 /* char * va_list instead counts number of bytes needed. */
12844 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12846 if (nregs > n_gpr)
12847 nregs = n_gpr;
12849 mem = gen_rtx_MEM (BLKmode,
12850 plus_constant (Pmode, save_area,
12851 first_reg_offset * reg_size));
12852 MEM_NOTRAP_P (mem) = 1;
12853 set_mem_alias_set (mem, set);
12854 set_mem_align (mem, BITS_PER_WORD);
12856 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12857 nregs);
12860 /* Save FP registers if needed. */
12861 if (DEFAULT_ABI == ABI_V4
12862 && TARGET_HARD_FLOAT
12863 && ! no_rtl
12864 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12865 && cfun->va_list_fpr_size)
12867 int fregno = next_cum.fregno, nregs;
12868 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12869 rtx lab = gen_label_rtx ();
12870 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12871 * UNITS_PER_FP_WORD);
12873 emit_jump_insn
12874 (gen_rtx_SET (pc_rtx,
12875 gen_rtx_IF_THEN_ELSE (VOIDmode,
12876 gen_rtx_NE (VOIDmode, cr1,
12877 const0_rtx),
12878 gen_rtx_LABEL_REF (VOIDmode, lab),
12879 pc_rtx)));
12881 for (nregs = 0;
12882 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12883 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12885 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12886 plus_constant (Pmode, save_area, off));
12887 MEM_NOTRAP_P (mem) = 1;
12888 set_mem_alias_set (mem, set);
12889 set_mem_align (mem, GET_MODE_ALIGNMENT (
12890 TARGET_HARD_FLOAT ? DFmode : SFmode));
12891 emit_move_insn (mem, gen_rtx_REG (
12892 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12895 emit_label (lab);
12899 /* Create the va_list data type. */
12901 static tree
12902 rs6000_build_builtin_va_list (void)
12904 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12906 /* For AIX, prefer 'char *' because that's what the system
12907 header files like. */
12908 if (DEFAULT_ABI != ABI_V4)
12909 return build_pointer_type (char_type_node);
12911 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12912 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12913 get_identifier ("__va_list_tag"), record);
12915 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12916 unsigned_char_type_node);
12917 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12918 unsigned_char_type_node);
12919 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12920 every user file. */
12921 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12922 get_identifier ("reserved"), short_unsigned_type_node);
12923 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12924 get_identifier ("overflow_arg_area"),
12925 ptr_type_node);
12926 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12927 get_identifier ("reg_save_area"),
12928 ptr_type_node);
12930 va_list_gpr_counter_field = f_gpr;
12931 va_list_fpr_counter_field = f_fpr;
12933 DECL_FIELD_CONTEXT (f_gpr) = record;
12934 DECL_FIELD_CONTEXT (f_fpr) = record;
12935 DECL_FIELD_CONTEXT (f_res) = record;
12936 DECL_FIELD_CONTEXT (f_ovf) = record;
12937 DECL_FIELD_CONTEXT (f_sav) = record;
12939 TYPE_STUB_DECL (record) = type_decl;
12940 TYPE_NAME (record) = type_decl;
12941 TYPE_FIELDS (record) = f_gpr;
12942 DECL_CHAIN (f_gpr) = f_fpr;
12943 DECL_CHAIN (f_fpr) = f_res;
12944 DECL_CHAIN (f_res) = f_ovf;
12945 DECL_CHAIN (f_ovf) = f_sav;
12947 layout_type (record);
12949 /* The correct type is an array type of one element. */
12950 return build_array_type (record, build_index_type (size_zero_node));
12953 /* Implement va_start. */
12955 static void
12956 rs6000_va_start (tree valist, rtx nextarg)
12958 HOST_WIDE_INT words, n_gpr, n_fpr;
12959 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12960 tree gpr, fpr, ovf, sav, t;
12962 /* Only SVR4 needs something special. */
12963 if (DEFAULT_ABI != ABI_V4)
12965 std_expand_builtin_va_start (valist, nextarg);
12966 return;
12969 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12970 f_fpr = DECL_CHAIN (f_gpr);
12971 f_res = DECL_CHAIN (f_fpr);
12972 f_ovf = DECL_CHAIN (f_res);
12973 f_sav = DECL_CHAIN (f_ovf);
12975 valist = build_simple_mem_ref (valist);
12976 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12977 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12978 f_fpr, NULL_TREE);
12979 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12980 f_ovf, NULL_TREE);
12981 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12982 f_sav, NULL_TREE);
12984 /* Count number of gp and fp argument registers used. */
12985 words = crtl->args.info.words;
12986 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12987 GP_ARG_NUM_REG);
12988 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12989 FP_ARG_NUM_REG);
12991 if (TARGET_DEBUG_ARG)
12992 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12993 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12994 words, n_gpr, n_fpr);
12996 if (cfun->va_list_gpr_size)
12998 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12999 build_int_cst (NULL_TREE, n_gpr));
13000 TREE_SIDE_EFFECTS (t) = 1;
13001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13004 if (cfun->va_list_fpr_size)
13006 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13007 build_int_cst (NULL_TREE, n_fpr));
13008 TREE_SIDE_EFFECTS (t) = 1;
13009 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13011 #ifdef HAVE_AS_GNU_ATTRIBUTE
13012 if (call_ABI_of_interest (cfun->decl))
13013 rs6000_passes_float = true;
13014 #endif
13017 /* Find the overflow area. */
13018 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13019 if (words != 0)
13020 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13021 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13022 TREE_SIDE_EFFECTS (t) = 1;
13023 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13025 /* If there were no va_arg invocations, don't set up the register
13026 save area. */
13027 if (!cfun->va_list_gpr_size
13028 && !cfun->va_list_fpr_size
13029 && n_gpr < GP_ARG_NUM_REG
13030 && n_fpr < FP_ARG_V4_MAX_REG)
13031 return;
13033 /* Find the register save area. */
13034 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13035 if (cfun->machine->varargs_save_offset)
13036 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13037 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13038 TREE_SIDE_EFFECTS (t) = 1;
13039 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13042 /* Implement va_arg. */
13044 static tree
13045 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13046 gimple_seq *post_p)
13048 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13049 tree gpr, fpr, ovf, sav, reg, t, u;
13050 int size, rsize, n_reg, sav_ofs, sav_scale;
13051 tree lab_false, lab_over, addr;
13052 int align;
13053 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13054 int regalign = 0;
13055 gimple *stmt;
13057 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13059 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13060 return build_va_arg_indirect_ref (t);
13063 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13064 earlier version of gcc, with the property that it always applied alignment
13065 adjustments to the va-args (even for zero-sized types). The cheapest way
13066 to deal with this is to replicate the effect of the part of
13067 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13068 of relevance.
13069 We don't need to check for pass-by-reference because of the test above.
13070 We can return a simplifed answer, since we know there's no offset to add. */
13072 if (((TARGET_MACHO
13073 && rs6000_darwin64_abi)
13074 || DEFAULT_ABI == ABI_ELFv2
13075 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13076 && integer_zerop (TYPE_SIZE (type)))
13078 unsigned HOST_WIDE_INT align, boundary;
13079 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13080 align = PARM_BOUNDARY / BITS_PER_UNIT;
13081 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13082 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13083 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13084 boundary /= BITS_PER_UNIT;
13085 if (boundary > align)
13087 tree t ;
13088 /* This updates arg ptr by the amount that would be necessary
13089 to align the zero-sized (but not zero-alignment) item. */
13090 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13091 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13092 gimplify_and_add (t, pre_p);
13094 t = fold_convert (sizetype, valist_tmp);
13095 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13096 fold_convert (TREE_TYPE (valist),
13097 fold_build2 (BIT_AND_EXPR, sizetype, t,
13098 size_int (-boundary))));
13099 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13100 gimplify_and_add (t, pre_p);
13102 /* Since it is zero-sized there's no increment for the item itself. */
13103 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13104 return build_va_arg_indirect_ref (valist_tmp);
13107 if (DEFAULT_ABI != ABI_V4)
13109 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13111 tree elem_type = TREE_TYPE (type);
13112 machine_mode elem_mode = TYPE_MODE (elem_type);
13113 int elem_size = GET_MODE_SIZE (elem_mode);
13115 if (elem_size < UNITS_PER_WORD)
13117 tree real_part, imag_part;
13118 gimple_seq post = NULL;
13120 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13121 &post);
13122 /* Copy the value into a temporary, lest the formal temporary
13123 be reused out from under us. */
13124 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13125 gimple_seq_add_seq (pre_p, post);
13127 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13128 post_p);
13130 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13134 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13137 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13138 f_fpr = DECL_CHAIN (f_gpr);
13139 f_res = DECL_CHAIN (f_fpr);
13140 f_ovf = DECL_CHAIN (f_res);
13141 f_sav = DECL_CHAIN (f_ovf);
13143 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13144 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13145 f_fpr, NULL_TREE);
13146 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13147 f_ovf, NULL_TREE);
13148 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13149 f_sav, NULL_TREE);
13151 size = int_size_in_bytes (type);
13152 rsize = (size + 3) / 4;
13153 int pad = 4 * rsize - size;
13154 align = 1;
13156 machine_mode mode = TYPE_MODE (type);
13157 if (abi_v4_pass_in_fpr (mode, false))
13159 /* FP args go in FP registers, if present. */
13160 reg = fpr;
13161 n_reg = (size + 7) / 8;
13162 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
13163 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
13164 if (mode != SFmode && mode != SDmode)
13165 align = 8;
13167 else
13169 /* Otherwise into GP registers. */
13170 reg = gpr;
13171 n_reg = rsize;
13172 sav_ofs = 0;
13173 sav_scale = 4;
13174 if (n_reg == 2)
13175 align = 8;
13178 /* Pull the value out of the saved registers.... */
13180 lab_over = NULL;
13181 addr = create_tmp_var (ptr_type_node, "addr");
13183 /* AltiVec vectors never go in registers when -mabi=altivec. */
13184 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13185 align = 16;
13186 else
13188 lab_false = create_artificial_label (input_location);
13189 lab_over = create_artificial_label (input_location);
13191 /* Long long is aligned in the registers. As are any other 2 gpr
13192 item such as complex int due to a historical mistake. */
13193 u = reg;
13194 if (n_reg == 2 && reg == gpr)
13196 regalign = 1;
13197 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13198 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13199 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13200 unshare_expr (reg), u);
13202 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13203 reg number is 0 for f1, so we want to make it odd. */
13204 else if (reg == fpr && mode == TDmode)
13206 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13207 build_int_cst (TREE_TYPE (reg), 1));
13208 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13211 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13212 t = build2 (GE_EXPR, boolean_type_node, u, t);
13213 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13214 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13215 gimplify_and_add (t, pre_p);
13217 t = sav;
13218 if (sav_ofs)
13219 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13221 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13222 build_int_cst (TREE_TYPE (reg), n_reg));
13223 u = fold_convert (sizetype, u);
13224 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13225 t = fold_build_pointer_plus (t, u);
13227 /* _Decimal32 varargs are located in the second word of the 64-bit
13228 FP register for 32-bit binaries. */
13229 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
13230 t = fold_build_pointer_plus_hwi (t, size);
13232 /* Args are passed right-aligned. */
13233 if (BYTES_BIG_ENDIAN)
13234 t = fold_build_pointer_plus_hwi (t, pad);
13236 gimplify_assign (addr, t, pre_p);
13238 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13240 stmt = gimple_build_label (lab_false);
13241 gimple_seq_add_stmt (pre_p, stmt);
13243 if ((n_reg == 2 && !regalign) || n_reg > 2)
13245 /* Ensure that we don't find any more args in regs.
13246 Alignment has taken care of for special cases. */
13247 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13251 /* ... otherwise out of the overflow area. */
13253 /* Care for on-stack alignment if needed. */
13254 t = ovf;
13255 if (align != 1)
13257 t = fold_build_pointer_plus_hwi (t, align - 1);
13258 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13259 build_int_cst (TREE_TYPE (t), -align));
13262 /* Args are passed right-aligned. */
13263 if (BYTES_BIG_ENDIAN)
13264 t = fold_build_pointer_plus_hwi (t, pad);
13266 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13268 gimplify_assign (unshare_expr (addr), t, pre_p);
13270 t = fold_build_pointer_plus_hwi (t, size);
13271 gimplify_assign (unshare_expr (ovf), t, pre_p);
13273 if (lab_over)
13275 stmt = gimple_build_label (lab_over);
13276 gimple_seq_add_stmt (pre_p, stmt);
13279 if (STRICT_ALIGNMENT
13280 && (TYPE_ALIGN (type)
13281 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13283 /* The value (of type complex double, for example) may not be
13284 aligned in memory in the saved registers, so copy via a
13285 temporary. (This is the same code as used for SPARC.) */
13286 tree tmp = create_tmp_var (type, "va_arg_tmp");
13287 tree dest_addr = build_fold_addr_expr (tmp);
13289 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13290 3, dest_addr, addr, size_int (rsize * 4));
13291 TREE_ADDRESSABLE (tmp) = 1;
13293 gimplify_and_add (copy, pre_p);
13294 addr = dest_addr;
13297 addr = fold_convert (ptrtype, addr);
13298 return build_va_arg_indirect_ref (addr);
13301 /* Builtins. */
13303 static void
13304 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13306 tree t;
13307 unsigned classify = rs6000_builtin_info[(int)code].attr;
13308 const char *attr_string = "";
13310 gcc_assert (name != NULL);
13311 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13313 if (rs6000_builtin_decls[(int)code])
13314 fatal_error (input_location,
13315 "internal error: builtin function %qs already processed",
13316 name);
13318 rs6000_builtin_decls[(int)code] = t =
13319 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13321 /* Set any special attributes. */
13322 if ((classify & RS6000_BTC_CONST) != 0)
13324 /* const function, function only depends on the inputs. */
13325 TREE_READONLY (t) = 1;
13326 TREE_NOTHROW (t) = 1;
13327 attr_string = ", const";
13329 else if ((classify & RS6000_BTC_PURE) != 0)
13331 /* pure function, function can read global memory, but does not set any
13332 external state. */
13333 DECL_PURE_P (t) = 1;
13334 TREE_NOTHROW (t) = 1;
13335 attr_string = ", pure";
13337 else if ((classify & RS6000_BTC_FP) != 0)
13339 /* Function is a math function. If rounding mode is on, then treat the
13340 function as not reading global memory, but it can have arbitrary side
13341 effects. If it is off, then assume the function is a const function.
13342 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13343 builtin-attribute.def that is used for the math functions. */
13344 TREE_NOTHROW (t) = 1;
13345 if (flag_rounding_math)
13347 DECL_PURE_P (t) = 1;
13348 DECL_IS_NOVOPS (t) = 1;
13349 attr_string = ", fp, pure";
13351 else
13353 TREE_READONLY (t) = 1;
13354 attr_string = ", fp, const";
13357 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13358 gcc_unreachable ();
13360 if (TARGET_DEBUG_BUILTIN)
13361 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13362 (int)code, name, attr_string);
13365 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13367 #undef RS6000_BUILTIN_0
13368 #undef RS6000_BUILTIN_1
13369 #undef RS6000_BUILTIN_2
13370 #undef RS6000_BUILTIN_3
13371 #undef RS6000_BUILTIN_A
13372 #undef RS6000_BUILTIN_D
13373 #undef RS6000_BUILTIN_H
13374 #undef RS6000_BUILTIN_P
13375 #undef RS6000_BUILTIN_X
13377 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13378 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13379 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13380 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13381 { MASK, ICODE, NAME, ENUM },
13383 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13384 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13385 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13386 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13387 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13389 static const struct builtin_description bdesc_3arg[] =
13391 #include "rs6000-builtin.def"
13394 /* DST operations: void foo (void *, const int, const char). */
13396 #undef RS6000_BUILTIN_0
13397 #undef RS6000_BUILTIN_1
13398 #undef RS6000_BUILTIN_2
13399 #undef RS6000_BUILTIN_3
13400 #undef RS6000_BUILTIN_A
13401 #undef RS6000_BUILTIN_D
13402 #undef RS6000_BUILTIN_H
13403 #undef RS6000_BUILTIN_P
13404 #undef RS6000_BUILTIN_X
13406 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13407 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13408 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13409 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13410 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13411 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13412 { MASK, ICODE, NAME, ENUM },
13414 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13415 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13416 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13418 static const struct builtin_description bdesc_dst[] =
13420 #include "rs6000-builtin.def"
13423 /* Simple binary operations: VECc = foo (VECa, VECb). */
13425 #undef RS6000_BUILTIN_0
13426 #undef RS6000_BUILTIN_1
13427 #undef RS6000_BUILTIN_2
13428 #undef RS6000_BUILTIN_3
13429 #undef RS6000_BUILTIN_A
13430 #undef RS6000_BUILTIN_D
13431 #undef RS6000_BUILTIN_H
13432 #undef RS6000_BUILTIN_P
13433 #undef RS6000_BUILTIN_X
13435 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13436 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13437 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13438 { MASK, ICODE, NAME, ENUM },
13440 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13441 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13442 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13443 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13444 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13445 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13447 static const struct builtin_description bdesc_2arg[] =
13449 #include "rs6000-builtin.def"
13452 #undef RS6000_BUILTIN_0
13453 #undef RS6000_BUILTIN_1
13454 #undef RS6000_BUILTIN_2
13455 #undef RS6000_BUILTIN_3
13456 #undef RS6000_BUILTIN_A
13457 #undef RS6000_BUILTIN_D
13458 #undef RS6000_BUILTIN_H
13459 #undef RS6000_BUILTIN_P
13460 #undef RS6000_BUILTIN_X
13462 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13463 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13464 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13465 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13466 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13467 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13468 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13469 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13470 { MASK, ICODE, NAME, ENUM },
13472 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13474 /* AltiVec predicates. */
13476 static const struct builtin_description bdesc_altivec_preds[] =
13478 #include "rs6000-builtin.def"
13481 /* ABS* operations. */
13483 #undef RS6000_BUILTIN_0
13484 #undef RS6000_BUILTIN_1
13485 #undef RS6000_BUILTIN_2
13486 #undef RS6000_BUILTIN_3
13487 #undef RS6000_BUILTIN_A
13488 #undef RS6000_BUILTIN_D
13489 #undef RS6000_BUILTIN_H
13490 #undef RS6000_BUILTIN_P
13491 #undef RS6000_BUILTIN_X
13493 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13494 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13495 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13496 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13497 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13498 { MASK, ICODE, NAME, ENUM },
13500 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13501 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13502 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13503 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13505 static const struct builtin_description bdesc_abs[] =
13507 #include "rs6000-builtin.def"
13510 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13511 foo (VECa). */
13513 #undef RS6000_BUILTIN_0
13514 #undef RS6000_BUILTIN_1
13515 #undef RS6000_BUILTIN_2
13516 #undef RS6000_BUILTIN_3
13517 #undef RS6000_BUILTIN_A
13518 #undef RS6000_BUILTIN_D
13519 #undef RS6000_BUILTIN_H
13520 #undef RS6000_BUILTIN_P
13521 #undef RS6000_BUILTIN_X
13523 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13524 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13525 { MASK, ICODE, NAME, ENUM },
13527 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13528 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13529 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13530 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13531 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13532 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13533 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13535 static const struct builtin_description bdesc_1arg[] =
13537 #include "rs6000-builtin.def"
13540 /* Simple no-argument operations: result = __builtin_darn_32 () */
13542 #undef RS6000_BUILTIN_0
13543 #undef RS6000_BUILTIN_1
13544 #undef RS6000_BUILTIN_2
13545 #undef RS6000_BUILTIN_3
13546 #undef RS6000_BUILTIN_A
13547 #undef RS6000_BUILTIN_D
13548 #undef RS6000_BUILTIN_H
13549 #undef RS6000_BUILTIN_P
13550 #undef RS6000_BUILTIN_X
13552 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13553 { MASK, ICODE, NAME, ENUM },
13555 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13556 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13557 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13558 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13559 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13560 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13561 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13562 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13564 static const struct builtin_description bdesc_0arg[] =
13566 #include "rs6000-builtin.def"
13569 /* HTM builtins. */
13570 #undef RS6000_BUILTIN_0
13571 #undef RS6000_BUILTIN_1
13572 #undef RS6000_BUILTIN_2
13573 #undef RS6000_BUILTIN_3
13574 #undef RS6000_BUILTIN_A
13575 #undef RS6000_BUILTIN_D
13576 #undef RS6000_BUILTIN_H
13577 #undef RS6000_BUILTIN_P
13578 #undef RS6000_BUILTIN_X
13580 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13581 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13582 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13583 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13584 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13585 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13586 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13587 { MASK, ICODE, NAME, ENUM },
13589 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13590 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13592 static const struct builtin_description bdesc_htm[] =
13594 #include "rs6000-builtin.def"
13597 #undef RS6000_BUILTIN_0
13598 #undef RS6000_BUILTIN_1
13599 #undef RS6000_BUILTIN_2
13600 #undef RS6000_BUILTIN_3
13601 #undef RS6000_BUILTIN_A
13602 #undef RS6000_BUILTIN_D
13603 #undef RS6000_BUILTIN_H
13604 #undef RS6000_BUILTIN_P
13606 /* Return true if a builtin function is overloaded. */
13607 bool
13608 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13610 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13613 const char *
13614 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13616 return rs6000_builtin_info[(int)fncode].name;
13619 /* Expand an expression EXP that calls a builtin without arguments. */
13620 static rtx
13621 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13623 rtx pat;
13624 machine_mode tmode = insn_data[icode].operand[0].mode;
13626 if (icode == CODE_FOR_nothing)
13627 /* Builtin not supported on this processor. */
13628 return 0;
13630 if (target == 0
13631 || GET_MODE (target) != tmode
13632 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13633 target = gen_reg_rtx (tmode);
13635 pat = GEN_FCN (icode) (target);
13636 if (! pat)
13637 return 0;
13638 emit_insn (pat);
13640 return target;
13644 static rtx
13645 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13647 rtx pat;
13648 tree arg0 = CALL_EXPR_ARG (exp, 0);
13649 tree arg1 = CALL_EXPR_ARG (exp, 1);
13650 rtx op0 = expand_normal (arg0);
13651 rtx op1 = expand_normal (arg1);
13652 machine_mode mode0 = insn_data[icode].operand[0].mode;
13653 machine_mode mode1 = insn_data[icode].operand[1].mode;
13655 if (icode == CODE_FOR_nothing)
13656 /* Builtin not supported on this processor. */
13657 return 0;
13659 /* If we got invalid arguments bail out before generating bad rtl. */
13660 if (arg0 == error_mark_node || arg1 == error_mark_node)
13661 return const0_rtx;
13663 if (GET_CODE (op0) != CONST_INT
13664 || INTVAL (op0) > 255
13665 || INTVAL (op0) < 0)
13667 error ("argument 1 must be an 8-bit field value");
13668 return const0_rtx;
13671 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13672 op0 = copy_to_mode_reg (mode0, op0);
13674 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13675 op1 = copy_to_mode_reg (mode1, op1);
13677 pat = GEN_FCN (icode) (op0, op1);
13678 if (! pat)
13679 return const0_rtx;
13680 emit_insn (pat);
13682 return NULL_RTX;
13685 static rtx
13686 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13688 rtx pat;
13689 tree arg0 = CALL_EXPR_ARG (exp, 0);
13690 rtx op0 = expand_normal (arg0);
13691 machine_mode tmode = insn_data[icode].operand[0].mode;
13692 machine_mode mode0 = insn_data[icode].operand[1].mode;
13694 if (icode == CODE_FOR_nothing)
13695 /* Builtin not supported on this processor. */
13696 return 0;
13698 /* If we got invalid arguments bail out before generating bad rtl. */
13699 if (arg0 == error_mark_node)
13700 return const0_rtx;
13702 if (icode == CODE_FOR_altivec_vspltisb
13703 || icode == CODE_FOR_altivec_vspltish
13704 || icode == CODE_FOR_altivec_vspltisw)
13706 /* Only allow 5-bit *signed* literals. */
13707 if (GET_CODE (op0) != CONST_INT
13708 || INTVAL (op0) > 15
13709 || INTVAL (op0) < -16)
13711 error ("argument 1 must be a 5-bit signed literal");
13712 return CONST0_RTX (tmode);
13716 if (target == 0
13717 || GET_MODE (target) != tmode
13718 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13719 target = gen_reg_rtx (tmode);
13721 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13722 op0 = copy_to_mode_reg (mode0, op0);
13724 pat = GEN_FCN (icode) (target, op0);
13725 if (! pat)
13726 return 0;
13727 emit_insn (pat);
13729 return target;
13732 static rtx
13733 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13735 rtx pat, scratch1, scratch2;
13736 tree arg0 = CALL_EXPR_ARG (exp, 0);
13737 rtx op0 = expand_normal (arg0);
13738 machine_mode tmode = insn_data[icode].operand[0].mode;
13739 machine_mode mode0 = insn_data[icode].operand[1].mode;
13741 /* If we have invalid arguments, bail out before generating bad rtl. */
13742 if (arg0 == error_mark_node)
13743 return const0_rtx;
13745 if (target == 0
13746 || GET_MODE (target) != tmode
13747 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13748 target = gen_reg_rtx (tmode);
13750 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13751 op0 = copy_to_mode_reg (mode0, op0);
13753 scratch1 = gen_reg_rtx (mode0);
13754 scratch2 = gen_reg_rtx (mode0);
13756 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13757 if (! pat)
13758 return 0;
13759 emit_insn (pat);
13761 return target;
13764 static rtx
13765 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13767 rtx pat;
13768 tree arg0 = CALL_EXPR_ARG (exp, 0);
13769 tree arg1 = CALL_EXPR_ARG (exp, 1);
13770 rtx op0 = expand_normal (arg0);
13771 rtx op1 = expand_normal (arg1);
13772 machine_mode tmode = insn_data[icode].operand[0].mode;
13773 machine_mode mode0 = insn_data[icode].operand[1].mode;
13774 machine_mode mode1 = insn_data[icode].operand[2].mode;
13776 if (icode == CODE_FOR_nothing)
13777 /* Builtin not supported on this processor. */
13778 return 0;
13780 /* If we got invalid arguments bail out before generating bad rtl. */
13781 if (arg0 == error_mark_node || arg1 == error_mark_node)
13782 return const0_rtx;
13784 if (icode == CODE_FOR_altivec_vcfux
13785 || icode == CODE_FOR_altivec_vcfsx
13786 || icode == CODE_FOR_altivec_vctsxs
13787 || icode == CODE_FOR_altivec_vctuxs
13788 || icode == CODE_FOR_altivec_vspltb
13789 || icode == CODE_FOR_altivec_vsplth
13790 || icode == CODE_FOR_altivec_vspltw)
13792 /* Only allow 5-bit unsigned literals. */
13793 STRIP_NOPS (arg1);
13794 if (TREE_CODE (arg1) != INTEGER_CST
13795 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13797 error ("argument 2 must be a 5-bit unsigned literal");
13798 return CONST0_RTX (tmode);
13801 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13802 || icode == CODE_FOR_dfptstsfi_lt_dd
13803 || icode == CODE_FOR_dfptstsfi_gt_dd
13804 || icode == CODE_FOR_dfptstsfi_unordered_dd
13805 || icode == CODE_FOR_dfptstsfi_eq_td
13806 || icode == CODE_FOR_dfptstsfi_lt_td
13807 || icode == CODE_FOR_dfptstsfi_gt_td
13808 || icode == CODE_FOR_dfptstsfi_unordered_td)
13810 /* Only allow 6-bit unsigned literals. */
13811 STRIP_NOPS (arg0);
13812 if (TREE_CODE (arg0) != INTEGER_CST
13813 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13815 error ("argument 1 must be a 6-bit unsigned literal");
13816 return CONST0_RTX (tmode);
13819 else if (icode == CODE_FOR_xststdcqp_kf
13820 || icode == CODE_FOR_xststdcqp_tf
13821 || icode == CODE_FOR_xststdcdp
13822 || icode == CODE_FOR_xststdcsp
13823 || icode == CODE_FOR_xvtstdcdp
13824 || icode == CODE_FOR_xvtstdcsp)
13826 /* Only allow 7-bit unsigned literals. */
13827 STRIP_NOPS (arg1);
13828 if (TREE_CODE (arg1) != INTEGER_CST
13829 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13831 error ("argument 2 must be a 7-bit unsigned literal");
13832 return CONST0_RTX (tmode);
13835 else if (icode == CODE_FOR_unpackv1ti
13836 || icode == CODE_FOR_unpackkf
13837 || icode == CODE_FOR_unpacktf
13838 || icode == CODE_FOR_unpackif
13839 || icode == CODE_FOR_unpacktd)
13841 /* Only allow 1-bit unsigned literals. */
13842 STRIP_NOPS (arg1);
13843 if (TREE_CODE (arg1) != INTEGER_CST
13844 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13846 error ("argument 2 must be a 1-bit unsigned literal");
13847 return CONST0_RTX (tmode);
13851 if (target == 0
13852 || GET_MODE (target) != tmode
13853 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13854 target = gen_reg_rtx (tmode);
13856 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13857 op0 = copy_to_mode_reg (mode0, op0);
13858 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13859 op1 = copy_to_mode_reg (mode1, op1);
13861 pat = GEN_FCN (icode) (target, op0, op1);
13862 if (! pat)
13863 return 0;
13864 emit_insn (pat);
13866 return target;
13869 static rtx
13870 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13872 rtx pat, scratch;
13873 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13874 tree arg0 = CALL_EXPR_ARG (exp, 1);
13875 tree arg1 = CALL_EXPR_ARG (exp, 2);
13876 rtx op0 = expand_normal (arg0);
13877 rtx op1 = expand_normal (arg1);
13878 machine_mode tmode = SImode;
13879 machine_mode mode0 = insn_data[icode].operand[1].mode;
13880 machine_mode mode1 = insn_data[icode].operand[2].mode;
13881 int cr6_form_int;
13883 if (TREE_CODE (cr6_form) != INTEGER_CST)
13885 error ("argument 1 of %qs must be a constant",
13886 "__builtin_altivec_predicate");
13887 return const0_rtx;
13889 else
13890 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13892 gcc_assert (mode0 == mode1);
13894 /* If we have invalid arguments, bail out before generating bad rtl. */
13895 if (arg0 == error_mark_node || arg1 == error_mark_node)
13896 return const0_rtx;
13898 if (target == 0
13899 || GET_MODE (target) != tmode
13900 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13901 target = gen_reg_rtx (tmode);
13903 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13904 op0 = copy_to_mode_reg (mode0, op0);
13905 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13906 op1 = copy_to_mode_reg (mode1, op1);
13908 /* Note that for many of the relevant operations (e.g. cmpne or
13909 cmpeq) with float or double operands, it makes more sense for the
13910 mode of the allocated scratch register to select a vector of
13911 integer. But the choice to copy the mode of operand 0 was made
13912 long ago and there are no plans to change it. */
13913 scratch = gen_reg_rtx (mode0);
13915 pat = GEN_FCN (icode) (scratch, op0, op1);
13916 if (! pat)
13917 return 0;
13918 emit_insn (pat);
13920 /* The vec_any* and vec_all* predicates use the same opcodes for two
13921 different operations, but the bits in CR6 will be different
13922 depending on what information we want. So we have to play tricks
13923 with CR6 to get the right bits out.
13925 If you think this is disgusting, look at the specs for the
13926 AltiVec predicates. */
13928 switch (cr6_form_int)
13930 case 0:
13931 emit_insn (gen_cr6_test_for_zero (target));
13932 break;
13933 case 1:
13934 emit_insn (gen_cr6_test_for_zero_reverse (target));
13935 break;
13936 case 2:
13937 emit_insn (gen_cr6_test_for_lt (target));
13938 break;
13939 case 3:
13940 emit_insn (gen_cr6_test_for_lt_reverse (target));
13941 break;
13942 default:
13943 error ("argument 1 of %qs is out of range",
13944 "__builtin_altivec_predicate");
13945 break;
13948 return target;
13952 swap_endian_selector_for_mode (machine_mode mode)
13954 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13955 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13956 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13957 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13959 unsigned int *swaparray, i;
13960 rtx perm[16];
13962 switch (mode)
13964 case E_V1TImode:
13965 swaparray = swap1;
13966 break;
13967 case E_V2DFmode:
13968 case E_V2DImode:
13969 swaparray = swap2;
13970 break;
13971 case E_V4SFmode:
13972 case E_V4SImode:
13973 swaparray = swap4;
13974 break;
13975 case E_V8HImode:
13976 swaparray = swap8;
13977 break;
13978 default:
13979 gcc_unreachable ();
13982 for (i = 0; i < 16; ++i)
13983 perm[i] = GEN_INT (swaparray[i]);
13985 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13986 gen_rtvec_v (16, perm)));
13989 static rtx
13990 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13992 rtx pat, addr;
13993 tree arg0 = CALL_EXPR_ARG (exp, 0);
13994 tree arg1 = CALL_EXPR_ARG (exp, 1);
13995 machine_mode tmode = insn_data[icode].operand[0].mode;
13996 machine_mode mode0 = Pmode;
13997 machine_mode mode1 = Pmode;
13998 rtx op0 = expand_normal (arg0);
13999 rtx op1 = expand_normal (arg1);
14001 if (icode == CODE_FOR_nothing)
14002 /* Builtin not supported on this processor. */
14003 return 0;
14005 /* If we got invalid arguments bail out before generating bad rtl. */
14006 if (arg0 == error_mark_node || arg1 == error_mark_node)
14007 return const0_rtx;
14009 if (target == 0
14010 || GET_MODE (target) != tmode
14011 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14012 target = gen_reg_rtx (tmode);
14014 op1 = copy_to_mode_reg (mode1, op1);
14016 /* For LVX, express the RTL accurately by ANDing the address with -16.
14017 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14018 so the raw address is fine. */
14019 if (icode == CODE_FOR_altivec_lvx_v1ti
14020 || icode == CODE_FOR_altivec_lvx_v2df
14021 || icode == CODE_FOR_altivec_lvx_v2di
14022 || icode == CODE_FOR_altivec_lvx_v4sf
14023 || icode == CODE_FOR_altivec_lvx_v4si
14024 || icode == CODE_FOR_altivec_lvx_v8hi
14025 || icode == CODE_FOR_altivec_lvx_v16qi)
14027 rtx rawaddr;
14028 if (op0 == const0_rtx)
14029 rawaddr = op1;
14030 else
14032 op0 = copy_to_mode_reg (mode0, op0);
14033 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14035 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14036 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14038 emit_insn (gen_rtx_SET (target, addr));
14040 else
14042 if (op0 == const0_rtx)
14043 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14044 else
14046 op0 = copy_to_mode_reg (mode0, op0);
14047 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14048 gen_rtx_PLUS (Pmode, op1, op0));
14051 pat = GEN_FCN (icode) (target, addr);
14052 if (! pat)
14053 return 0;
14054 emit_insn (pat);
14057 return target;
14060 static rtx
14061 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14063 rtx pat;
14064 tree arg0 = CALL_EXPR_ARG (exp, 0);
14065 tree arg1 = CALL_EXPR_ARG (exp, 1);
14066 tree arg2 = CALL_EXPR_ARG (exp, 2);
14067 rtx op0 = expand_normal (arg0);
14068 rtx op1 = expand_normal (arg1);
14069 rtx op2 = expand_normal (arg2);
14070 machine_mode mode0 = insn_data[icode].operand[0].mode;
14071 machine_mode mode1 = insn_data[icode].operand[1].mode;
14072 machine_mode mode2 = insn_data[icode].operand[2].mode;
14074 if (icode == CODE_FOR_nothing)
14075 /* Builtin not supported on this processor. */
14076 return NULL_RTX;
14078 /* If we got invalid arguments bail out before generating bad rtl. */
14079 if (arg0 == error_mark_node
14080 || arg1 == error_mark_node
14081 || arg2 == error_mark_node)
14082 return NULL_RTX;
14084 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14085 op0 = copy_to_mode_reg (mode0, op0);
14086 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14087 op1 = copy_to_mode_reg (mode1, op1);
14088 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14089 op2 = copy_to_mode_reg (mode2, op2);
14091 pat = GEN_FCN (icode) (op0, op1, op2);
14092 if (pat)
14093 emit_insn (pat);
14095 return NULL_RTX;
14098 static rtx
14099 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14101 tree arg0 = CALL_EXPR_ARG (exp, 0);
14102 tree arg1 = CALL_EXPR_ARG (exp, 1);
14103 tree arg2 = CALL_EXPR_ARG (exp, 2);
14104 rtx op0 = expand_normal (arg0);
14105 rtx op1 = expand_normal (arg1);
14106 rtx op2 = expand_normal (arg2);
14107 rtx pat, addr, rawaddr;
14108 machine_mode tmode = insn_data[icode].operand[0].mode;
14109 machine_mode smode = insn_data[icode].operand[1].mode;
14110 machine_mode mode1 = Pmode;
14111 machine_mode mode2 = Pmode;
14113 /* Invalid arguments. Bail before doing anything stoopid! */
14114 if (arg0 == error_mark_node
14115 || arg1 == error_mark_node
14116 || arg2 == error_mark_node)
14117 return const0_rtx;
14119 op2 = copy_to_mode_reg (mode2, op2);
14121 /* For STVX, express the RTL accurately by ANDing the address with -16.
14122 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14123 so the raw address is fine. */
14124 if (icode == CODE_FOR_altivec_stvx_v2df
14125 || icode == CODE_FOR_altivec_stvx_v2di
14126 || icode == CODE_FOR_altivec_stvx_v4sf
14127 || icode == CODE_FOR_altivec_stvx_v4si
14128 || icode == CODE_FOR_altivec_stvx_v8hi
14129 || icode == CODE_FOR_altivec_stvx_v16qi)
14131 if (op1 == const0_rtx)
14132 rawaddr = op2;
14133 else
14135 op1 = copy_to_mode_reg (mode1, op1);
14136 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14139 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14140 addr = gen_rtx_MEM (tmode, addr);
14142 op0 = copy_to_mode_reg (tmode, op0);
14144 emit_insn (gen_rtx_SET (addr, op0));
14146 else
14148 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14149 op0 = copy_to_mode_reg (smode, op0);
14151 if (op1 == const0_rtx)
14152 addr = gen_rtx_MEM (tmode, op2);
14153 else
14155 op1 = copy_to_mode_reg (mode1, op1);
14156 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14159 pat = GEN_FCN (icode) (addr, op0);
14160 if (pat)
14161 emit_insn (pat);
14164 return NULL_RTX;
14167 /* Return the appropriate SPR number associated with the given builtin. */
14168 static inline HOST_WIDE_INT
14169 htm_spr_num (enum rs6000_builtins code)
14171 if (code == HTM_BUILTIN_GET_TFHAR
14172 || code == HTM_BUILTIN_SET_TFHAR)
14173 return TFHAR_SPR;
14174 else if (code == HTM_BUILTIN_GET_TFIAR
14175 || code == HTM_BUILTIN_SET_TFIAR)
14176 return TFIAR_SPR;
14177 else if (code == HTM_BUILTIN_GET_TEXASR
14178 || code == HTM_BUILTIN_SET_TEXASR)
14179 return TEXASR_SPR;
14180 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14181 || code == HTM_BUILTIN_SET_TEXASRU);
14182 return TEXASRU_SPR;
14185 /* Return the appropriate SPR regno associated with the given builtin. */
14186 static inline HOST_WIDE_INT
14187 htm_spr_regno (enum rs6000_builtins code)
14189 if (code == HTM_BUILTIN_GET_TFHAR
14190 || code == HTM_BUILTIN_SET_TFHAR)
14191 return TFHAR_REGNO;
14192 else if (code == HTM_BUILTIN_GET_TFIAR
14193 || code == HTM_BUILTIN_SET_TFIAR)
14194 return TFIAR_REGNO;
14195 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14196 || code == HTM_BUILTIN_SET_TEXASR
14197 || code == HTM_BUILTIN_GET_TEXASRU
14198 || code == HTM_BUILTIN_SET_TEXASRU);
14199 return TEXASR_REGNO;
14202 /* Return the correct ICODE value depending on whether we are
14203 setting or reading the HTM SPRs. */
14204 static inline enum insn_code
14205 rs6000_htm_spr_icode (bool nonvoid)
14207 if (nonvoid)
14208 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14209 else
14210 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14213 /* Expand the HTM builtin in EXP and store the result in TARGET.
14214 Store true in *EXPANDEDP if we found a builtin to expand. */
14215 static rtx
14216 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14218 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14219 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14220 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14221 const struct builtin_description *d;
14222 size_t i;
14224 *expandedp = true;
14226 if (!TARGET_POWERPC64
14227 && (fcode == HTM_BUILTIN_TABORTDC
14228 || fcode == HTM_BUILTIN_TABORTDCI))
14230 size_t uns_fcode = (size_t)fcode;
14231 const char *name = rs6000_builtin_info[uns_fcode].name;
14232 error ("builtin %qs is only valid in 64-bit mode", name);
14233 return const0_rtx;
14236 /* Expand the HTM builtins. */
14237 d = bdesc_htm;
14238 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14239 if (d->code == fcode)
14241 rtx op[MAX_HTM_OPERANDS], pat;
14242 int nopnds = 0;
14243 tree arg;
14244 call_expr_arg_iterator iter;
14245 unsigned attr = rs6000_builtin_info[fcode].attr;
14246 enum insn_code icode = d->icode;
14247 const struct insn_operand_data *insn_op;
14248 bool uses_spr = (attr & RS6000_BTC_SPR);
14249 rtx cr = NULL_RTX;
14251 if (uses_spr)
14252 icode = rs6000_htm_spr_icode (nonvoid);
14253 insn_op = &insn_data[icode].operand[0];
14255 if (nonvoid)
14257 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
14258 if (!target
14259 || GET_MODE (target) != tmode
14260 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14261 target = gen_reg_rtx (tmode);
14262 if (uses_spr)
14263 op[nopnds++] = target;
14266 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14268 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14269 return const0_rtx;
14271 insn_op = &insn_data[icode].operand[nopnds];
14273 op[nopnds] = expand_normal (arg);
14275 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14277 if (!strcmp (insn_op->constraint, "n"))
14279 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14280 if (!CONST_INT_P (op[nopnds]))
14281 error ("argument %d must be an unsigned literal", arg_num);
14282 else
14283 error ("argument %d is an unsigned literal that is "
14284 "out of range", arg_num);
14285 return const0_rtx;
14287 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14290 nopnds++;
14293 /* Handle the builtins for extended mnemonics. These accept
14294 no arguments, but map to builtins that take arguments. */
14295 switch (fcode)
14297 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14298 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14299 op[nopnds++] = GEN_INT (1);
14300 if (flag_checking)
14301 attr |= RS6000_BTC_UNARY;
14302 break;
14303 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14304 op[nopnds++] = GEN_INT (0);
14305 if (flag_checking)
14306 attr |= RS6000_BTC_UNARY;
14307 break;
14308 default:
14309 break;
14312 /* If this builtin accesses SPRs, then pass in the appropriate
14313 SPR number and SPR regno as the last two operands. */
14314 if (uses_spr)
14316 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14317 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14318 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14320 /* If this builtin accesses a CR, then pass in a scratch
14321 CR as the last operand. */
14322 else if (attr & RS6000_BTC_CR)
14323 { cr = gen_reg_rtx (CCmode);
14324 op[nopnds++] = cr;
14327 if (flag_checking)
14329 int expected_nopnds = 0;
14330 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14331 expected_nopnds = 1;
14332 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14333 expected_nopnds = 2;
14334 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14335 expected_nopnds = 3;
14336 if (!(attr & RS6000_BTC_VOID))
14337 expected_nopnds += 1;
14338 if (uses_spr)
14339 expected_nopnds += 2;
14341 gcc_assert (nopnds == expected_nopnds
14342 && nopnds <= MAX_HTM_OPERANDS);
14345 switch (nopnds)
14347 case 1:
14348 pat = GEN_FCN (icode) (op[0]);
14349 break;
14350 case 2:
14351 pat = GEN_FCN (icode) (op[0], op[1]);
14352 break;
14353 case 3:
14354 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14355 break;
14356 case 4:
14357 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14358 break;
14359 default:
14360 gcc_unreachable ();
14362 if (!pat)
14363 return NULL_RTX;
14364 emit_insn (pat);
14366 if (attr & RS6000_BTC_CR)
14368 if (fcode == HTM_BUILTIN_TBEGIN)
14370 /* Emit code to set TARGET to true or false depending on
14371 whether the tbegin. instruction successfully or failed
14372 to start a transaction. We do this by placing the 1's
14373 complement of CR's EQ bit into TARGET. */
14374 rtx scratch = gen_reg_rtx (SImode);
14375 emit_insn (gen_rtx_SET (scratch,
14376 gen_rtx_EQ (SImode, cr,
14377 const0_rtx)));
14378 emit_insn (gen_rtx_SET (target,
14379 gen_rtx_XOR (SImode, scratch,
14380 GEN_INT (1))));
14382 else
14384 /* Emit code to copy the 4-bit condition register field
14385 CR into the least significant end of register TARGET. */
14386 rtx scratch1 = gen_reg_rtx (SImode);
14387 rtx scratch2 = gen_reg_rtx (SImode);
14388 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14389 emit_insn (gen_movcc (subreg, cr));
14390 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14391 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14395 if (nonvoid)
14396 return target;
14397 return const0_rtx;
14400 *expandedp = false;
14401 return NULL_RTX;
14404 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14406 static rtx
14407 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14408 rtx target)
14410 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14411 if (fcode == RS6000_BUILTIN_CPU_INIT)
14412 return const0_rtx;
14414 if (target == 0 || GET_MODE (target) != SImode)
14415 target = gen_reg_rtx (SImode);
14417 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14418 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14419 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14420 to a STRING_CST. */
14421 if (TREE_CODE (arg) == ARRAY_REF
14422 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14423 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14424 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14425 arg = TREE_OPERAND (arg, 0);
14427 if (TREE_CODE (arg) != STRING_CST)
14429 error ("builtin %qs only accepts a string argument",
14430 rs6000_builtin_info[(size_t) fcode].name);
14431 return const0_rtx;
14434 if (fcode == RS6000_BUILTIN_CPU_IS)
14436 const char *cpu = TREE_STRING_POINTER (arg);
14437 rtx cpuid = NULL_RTX;
14438 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14439 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14441 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14442 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14443 break;
14445 if (cpuid == NULL_RTX)
14447 /* Invalid CPU argument. */
14448 error ("cpu %qs is an invalid argument to builtin %qs",
14449 cpu, rs6000_builtin_info[(size_t) fcode].name);
14450 return const0_rtx;
14453 rtx platform = gen_reg_rtx (SImode);
14454 rtx tcbmem = gen_const_mem (SImode,
14455 gen_rtx_PLUS (Pmode,
14456 gen_rtx_REG (Pmode, TLS_REGNUM),
14457 GEN_INT (TCB_PLATFORM_OFFSET)));
14458 emit_move_insn (platform, tcbmem);
14459 emit_insn (gen_eqsi3 (target, platform, cpuid));
14461 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14463 const char *hwcap = TREE_STRING_POINTER (arg);
14464 rtx mask = NULL_RTX;
14465 int hwcap_offset;
14466 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14467 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14469 mask = GEN_INT (cpu_supports_info[i].mask);
14470 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14471 break;
14473 if (mask == NULL_RTX)
14475 /* Invalid HWCAP argument. */
14476 error ("%s %qs is an invalid argument to builtin %qs",
14477 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
14478 return const0_rtx;
14481 rtx tcb_hwcap = gen_reg_rtx (SImode);
14482 rtx tcbmem = gen_const_mem (SImode,
14483 gen_rtx_PLUS (Pmode,
14484 gen_rtx_REG (Pmode, TLS_REGNUM),
14485 GEN_INT (hwcap_offset)));
14486 emit_move_insn (tcb_hwcap, tcbmem);
14487 rtx scratch1 = gen_reg_rtx (SImode);
14488 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14489 rtx scratch2 = gen_reg_rtx (SImode);
14490 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14491 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14493 else
14494 gcc_unreachable ();
14496 /* Record that we have expanded a CPU builtin, so that we can later
14497 emit a reference to the special symbol exported by LIBC to ensure we
14498 do not link against an old LIBC that doesn't support this feature. */
14499 cpu_builtin_p = true;
14501 #else
14502 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
14503 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
14505 /* For old LIBCs, always return FALSE. */
14506 emit_move_insn (target, GEN_INT (0));
14507 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14509 return target;
14512 static rtx
14513 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14515 rtx pat;
14516 tree arg0 = CALL_EXPR_ARG (exp, 0);
14517 tree arg1 = CALL_EXPR_ARG (exp, 1);
14518 tree arg2 = CALL_EXPR_ARG (exp, 2);
14519 rtx op0 = expand_normal (arg0);
14520 rtx op1 = expand_normal (arg1);
14521 rtx op2 = expand_normal (arg2);
14522 machine_mode tmode = insn_data[icode].operand[0].mode;
14523 machine_mode mode0 = insn_data[icode].operand[1].mode;
14524 machine_mode mode1 = insn_data[icode].operand[2].mode;
14525 machine_mode mode2 = insn_data[icode].operand[3].mode;
14527 if (icode == CODE_FOR_nothing)
14528 /* Builtin not supported on this processor. */
14529 return 0;
14531 /* If we got invalid arguments bail out before generating bad rtl. */
14532 if (arg0 == error_mark_node
14533 || arg1 == error_mark_node
14534 || arg2 == error_mark_node)
14535 return const0_rtx;
14537 /* Check and prepare argument depending on the instruction code.
14539 Note that a switch statement instead of the sequence of tests
14540 would be incorrect as many of the CODE_FOR values could be
14541 CODE_FOR_nothing and that would yield multiple alternatives
14542 with identical values. We'd never reach here at runtime in
14543 this case. */
14544 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14545 || icode == CODE_FOR_altivec_vsldoi_v2df
14546 || icode == CODE_FOR_altivec_vsldoi_v4si
14547 || icode == CODE_FOR_altivec_vsldoi_v8hi
14548 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14550 /* Only allow 4-bit unsigned literals. */
14551 STRIP_NOPS (arg2);
14552 if (TREE_CODE (arg2) != INTEGER_CST
14553 || TREE_INT_CST_LOW (arg2) & ~0xf)
14555 error ("argument 3 must be a 4-bit unsigned literal");
14556 return CONST0_RTX (tmode);
14559 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14560 || icode == CODE_FOR_vsx_xxpermdi_v2di
14561 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
14562 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
14563 || icode == CODE_FOR_vsx_xxpermdi_v1ti
14564 || icode == CODE_FOR_vsx_xxpermdi_v4sf
14565 || icode == CODE_FOR_vsx_xxpermdi_v4si
14566 || icode == CODE_FOR_vsx_xxpermdi_v8hi
14567 || icode == CODE_FOR_vsx_xxpermdi_v16qi
14568 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14569 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14570 || icode == CODE_FOR_vsx_xxsldwi_v4si
14571 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14572 || icode == CODE_FOR_vsx_xxsldwi_v2di
14573 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14575 /* Only allow 2-bit unsigned literals. */
14576 STRIP_NOPS (arg2);
14577 if (TREE_CODE (arg2) != INTEGER_CST
14578 || TREE_INT_CST_LOW (arg2) & ~0x3)
14580 error ("argument 3 must be a 2-bit unsigned literal");
14581 return CONST0_RTX (tmode);
14584 else if (icode == CODE_FOR_vsx_set_v2df
14585 || icode == CODE_FOR_vsx_set_v2di
14586 || icode == CODE_FOR_bcdadd
14587 || icode == CODE_FOR_bcdadd_lt
14588 || icode == CODE_FOR_bcdadd_eq
14589 || icode == CODE_FOR_bcdadd_gt
14590 || icode == CODE_FOR_bcdsub
14591 || icode == CODE_FOR_bcdsub_lt
14592 || icode == CODE_FOR_bcdsub_eq
14593 || icode == CODE_FOR_bcdsub_gt)
14595 /* Only allow 1-bit unsigned literals. */
14596 STRIP_NOPS (arg2);
14597 if (TREE_CODE (arg2) != INTEGER_CST
14598 || TREE_INT_CST_LOW (arg2) & ~0x1)
14600 error ("argument 3 must be a 1-bit unsigned literal");
14601 return CONST0_RTX (tmode);
14604 else if (icode == CODE_FOR_dfp_ddedpd_dd
14605 || icode == CODE_FOR_dfp_ddedpd_td)
14607 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14608 STRIP_NOPS (arg0);
14609 if (TREE_CODE (arg0) != INTEGER_CST
14610 || TREE_INT_CST_LOW (arg2) & ~0x3)
14612 error ("argument 1 must be 0 or 2");
14613 return CONST0_RTX (tmode);
14616 else if (icode == CODE_FOR_dfp_denbcd_dd
14617 || icode == CODE_FOR_dfp_denbcd_td)
14619 /* Only allow 1-bit unsigned literals. */
14620 STRIP_NOPS (arg0);
14621 if (TREE_CODE (arg0) != INTEGER_CST
14622 || TREE_INT_CST_LOW (arg0) & ~0x1)
14624 error ("argument 1 must be a 1-bit unsigned literal");
14625 return CONST0_RTX (tmode);
14628 else if (icode == CODE_FOR_dfp_dscli_dd
14629 || icode == CODE_FOR_dfp_dscli_td
14630 || icode == CODE_FOR_dfp_dscri_dd
14631 || icode == CODE_FOR_dfp_dscri_td)
14633 /* Only allow 6-bit unsigned literals. */
14634 STRIP_NOPS (arg1);
14635 if (TREE_CODE (arg1) != INTEGER_CST
14636 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14638 error ("argument 2 must be a 6-bit unsigned literal");
14639 return CONST0_RTX (tmode);
14642 else if (icode == CODE_FOR_crypto_vshasigmaw
14643 || icode == CODE_FOR_crypto_vshasigmad)
14645 /* Check whether the 2nd and 3rd arguments are integer constants and in
14646 range and prepare arguments. */
14647 STRIP_NOPS (arg1);
14648 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14650 error ("argument 2 must be 0 or 1");
14651 return CONST0_RTX (tmode);
14654 STRIP_NOPS (arg2);
14655 if (TREE_CODE (arg2) != INTEGER_CST
14656 || wi::geu_p (wi::to_wide (arg2), 16))
14658 error ("argument 3 must be in the range 0..15");
14659 return CONST0_RTX (tmode);
14663 if (target == 0
14664 || GET_MODE (target) != tmode
14665 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14666 target = gen_reg_rtx (tmode);
14668 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14669 op0 = copy_to_mode_reg (mode0, op0);
14670 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14671 op1 = copy_to_mode_reg (mode1, op1);
14672 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14673 op2 = copy_to_mode_reg (mode2, op2);
14675 pat = GEN_FCN (icode) (target, op0, op1, op2);
14676 if (! pat)
14677 return 0;
14678 emit_insn (pat);
14680 return target;
14684 /* Expand the dst builtins. */
14685 static rtx
14686 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14687 bool *expandedp)
14689 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14690 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14691 tree arg0, arg1, arg2;
14692 machine_mode mode0, mode1;
14693 rtx pat, op0, op1, op2;
14694 const struct builtin_description *d;
14695 size_t i;
14697 *expandedp = false;
14699 /* Handle DST variants. */
14700 d = bdesc_dst;
14701 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14702 if (d->code == fcode)
14704 arg0 = CALL_EXPR_ARG (exp, 0);
14705 arg1 = CALL_EXPR_ARG (exp, 1);
14706 arg2 = CALL_EXPR_ARG (exp, 2);
14707 op0 = expand_normal (arg0);
14708 op1 = expand_normal (arg1);
14709 op2 = expand_normal (arg2);
14710 mode0 = insn_data[d->icode].operand[0].mode;
14711 mode1 = insn_data[d->icode].operand[1].mode;
14713 /* Invalid arguments, bail out before generating bad rtl. */
14714 if (arg0 == error_mark_node
14715 || arg1 == error_mark_node
14716 || arg2 == error_mark_node)
14717 return const0_rtx;
14719 *expandedp = true;
14720 STRIP_NOPS (arg2);
14721 if (TREE_CODE (arg2) != INTEGER_CST
14722 || TREE_INT_CST_LOW (arg2) & ~0x3)
14724 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14725 return const0_rtx;
14728 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14729 op0 = copy_to_mode_reg (Pmode, op0);
14730 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14731 op1 = copy_to_mode_reg (mode1, op1);
14733 pat = GEN_FCN (d->icode) (op0, op1, op2);
14734 if (pat != 0)
14735 emit_insn (pat);
14737 return NULL_RTX;
14740 return NULL_RTX;
14743 /* Expand vec_init builtin. */
14744 static rtx
14745 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14747 machine_mode tmode = TYPE_MODE (type);
14748 machine_mode inner_mode = GET_MODE_INNER (tmode);
14749 int i, n_elt = GET_MODE_NUNITS (tmode);
14751 gcc_assert (VECTOR_MODE_P (tmode));
14752 gcc_assert (n_elt == call_expr_nargs (exp));
14754 if (!target || !register_operand (target, tmode))
14755 target = gen_reg_rtx (tmode);
14757 /* If we have a vector compromised of a single element, such as V1TImode, do
14758 the initialization directly. */
14759 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14761 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14762 emit_move_insn (target, gen_lowpart (tmode, x));
14764 else
14766 rtvec v = rtvec_alloc (n_elt);
14768 for (i = 0; i < n_elt; ++i)
14770 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14771 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14774 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14777 return target;
14780 /* Return the integer constant in ARG. Constrain it to be in the range
14781 of the subparts of VEC_TYPE; issue an error if not. */
14783 static int
14784 get_element_number (tree vec_type, tree arg)
14786 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14788 if (!tree_fits_uhwi_p (arg)
14789 || (elt = tree_to_uhwi (arg), elt > max))
14791 error ("selector must be an integer constant in the range 0..%wi", max);
14792 return 0;
14795 return elt;
14798 /* Expand vec_set builtin. */
14799 static rtx
14800 altivec_expand_vec_set_builtin (tree exp)
14802 machine_mode tmode, mode1;
14803 tree arg0, arg1, arg2;
14804 int elt;
14805 rtx op0, op1;
14807 arg0 = CALL_EXPR_ARG (exp, 0);
14808 arg1 = CALL_EXPR_ARG (exp, 1);
14809 arg2 = CALL_EXPR_ARG (exp, 2);
14811 tmode = TYPE_MODE (TREE_TYPE (arg0));
14812 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14813 gcc_assert (VECTOR_MODE_P (tmode));
14815 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14816 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14817 elt = get_element_number (TREE_TYPE (arg0), arg2);
14819 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14820 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14822 op0 = force_reg (tmode, op0);
14823 op1 = force_reg (mode1, op1);
14825 rs6000_expand_vector_set (op0, op1, elt);
14827 return op0;
14830 /* Expand vec_ext builtin. */
14831 static rtx
14832 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14834 machine_mode tmode, mode0;
14835 tree arg0, arg1;
14836 rtx op0;
14837 rtx op1;
14839 arg0 = CALL_EXPR_ARG (exp, 0);
14840 arg1 = CALL_EXPR_ARG (exp, 1);
14842 op0 = expand_normal (arg0);
14843 op1 = expand_normal (arg1);
14845 /* Call get_element_number to validate arg1 if it is a constant. */
14846 if (TREE_CODE (arg1) == INTEGER_CST)
14847 (void) get_element_number (TREE_TYPE (arg0), arg1);
14849 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14850 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14851 gcc_assert (VECTOR_MODE_P (mode0));
14853 op0 = force_reg (mode0, op0);
14855 if (optimize || !target || !register_operand (target, tmode))
14856 target = gen_reg_rtx (tmode);
14858 rs6000_expand_vector_extract (target, op0, op1);
14860 return target;
14863 /* Expand the builtin in EXP and store the result in TARGET. Store
14864 true in *EXPANDEDP if we found a builtin to expand. */
14865 static rtx
14866 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14868 const struct builtin_description *d;
14869 size_t i;
14870 enum insn_code icode;
14871 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14872 tree arg0, arg1, arg2;
14873 rtx op0, pat;
14874 machine_mode tmode, mode0;
14875 enum rs6000_builtins fcode
14876 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14878 if (rs6000_overloaded_builtin_p (fcode))
14880 *expandedp = true;
14881 error ("unresolved overload for Altivec builtin %qF", fndecl);
14883 /* Given it is invalid, just generate a normal call. */
14884 return expand_call (exp, target, false);
14887 target = altivec_expand_dst_builtin (exp, target, expandedp);
14888 if (*expandedp)
14889 return target;
14891 *expandedp = true;
14893 switch (fcode)
14895 case ALTIVEC_BUILTIN_STVX_V2DF:
14896 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14897 case ALTIVEC_BUILTIN_STVX_V2DI:
14898 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14899 case ALTIVEC_BUILTIN_STVX_V4SF:
14900 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14901 case ALTIVEC_BUILTIN_STVX:
14902 case ALTIVEC_BUILTIN_STVX_V4SI:
14903 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14904 case ALTIVEC_BUILTIN_STVX_V8HI:
14905 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14906 case ALTIVEC_BUILTIN_STVX_V16QI:
14907 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14908 case ALTIVEC_BUILTIN_STVEBX:
14909 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14910 case ALTIVEC_BUILTIN_STVEHX:
14911 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14912 case ALTIVEC_BUILTIN_STVEWX:
14913 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14914 case ALTIVEC_BUILTIN_STVXL_V2DF:
14915 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14916 case ALTIVEC_BUILTIN_STVXL_V2DI:
14917 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14918 case ALTIVEC_BUILTIN_STVXL_V4SF:
14919 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14920 case ALTIVEC_BUILTIN_STVXL:
14921 case ALTIVEC_BUILTIN_STVXL_V4SI:
14922 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14923 case ALTIVEC_BUILTIN_STVXL_V8HI:
14924 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14925 case ALTIVEC_BUILTIN_STVXL_V16QI:
14926 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14928 case ALTIVEC_BUILTIN_STVLX:
14929 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14930 case ALTIVEC_BUILTIN_STVLXL:
14931 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14932 case ALTIVEC_BUILTIN_STVRX:
14933 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14934 case ALTIVEC_BUILTIN_STVRXL:
14935 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14937 case P9V_BUILTIN_STXVL:
14938 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14940 case P9V_BUILTIN_XST_LEN_R:
14941 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14943 case VSX_BUILTIN_STXVD2X_V1TI:
14944 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14945 case VSX_BUILTIN_STXVD2X_V2DF:
14946 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14947 case VSX_BUILTIN_STXVD2X_V2DI:
14948 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14949 case VSX_BUILTIN_STXVW4X_V4SF:
14950 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14951 case VSX_BUILTIN_STXVW4X_V4SI:
14952 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14953 case VSX_BUILTIN_STXVW4X_V8HI:
14954 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14955 case VSX_BUILTIN_STXVW4X_V16QI:
14956 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14958 /* For the following on big endian, it's ok to use any appropriate
14959 unaligned-supporting store, so use a generic expander. For
14960 little-endian, the exact element-reversing instruction must
14961 be used. */
14962 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14964 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14965 : CODE_FOR_vsx_st_elemrev_v1ti);
14966 return altivec_expand_stv_builtin (code, exp);
14968 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14970 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14971 : CODE_FOR_vsx_st_elemrev_v2df);
14972 return altivec_expand_stv_builtin (code, exp);
14974 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14976 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14977 : CODE_FOR_vsx_st_elemrev_v2di);
14978 return altivec_expand_stv_builtin (code, exp);
14980 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14982 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14983 : CODE_FOR_vsx_st_elemrev_v4sf);
14984 return altivec_expand_stv_builtin (code, exp);
14986 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14988 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14989 : CODE_FOR_vsx_st_elemrev_v4si);
14990 return altivec_expand_stv_builtin (code, exp);
14992 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14994 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14995 : CODE_FOR_vsx_st_elemrev_v8hi);
14996 return altivec_expand_stv_builtin (code, exp);
14998 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15000 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15001 : CODE_FOR_vsx_st_elemrev_v16qi);
15002 return altivec_expand_stv_builtin (code, exp);
15005 case ALTIVEC_BUILTIN_MFVSCR:
15006 icode = CODE_FOR_altivec_mfvscr;
15007 tmode = insn_data[icode].operand[0].mode;
15009 if (target == 0
15010 || GET_MODE (target) != tmode
15011 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15012 target = gen_reg_rtx (tmode);
15014 pat = GEN_FCN (icode) (target);
15015 if (! pat)
15016 return 0;
15017 emit_insn (pat);
15018 return target;
15020 case ALTIVEC_BUILTIN_MTVSCR:
15021 icode = CODE_FOR_altivec_mtvscr;
15022 arg0 = CALL_EXPR_ARG (exp, 0);
15023 op0 = expand_normal (arg0);
15024 mode0 = insn_data[icode].operand[0].mode;
15026 /* If we got invalid arguments bail out before generating bad rtl. */
15027 if (arg0 == error_mark_node)
15028 return const0_rtx;
15030 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15031 op0 = copy_to_mode_reg (mode0, op0);
15033 pat = GEN_FCN (icode) (op0);
15034 if (pat)
15035 emit_insn (pat);
15036 return NULL_RTX;
15038 case ALTIVEC_BUILTIN_DSSALL:
15039 emit_insn (gen_altivec_dssall ());
15040 return NULL_RTX;
15042 case ALTIVEC_BUILTIN_DSS:
15043 icode = CODE_FOR_altivec_dss;
15044 arg0 = CALL_EXPR_ARG (exp, 0);
15045 STRIP_NOPS (arg0);
15046 op0 = expand_normal (arg0);
15047 mode0 = insn_data[icode].operand[0].mode;
15049 /* If we got invalid arguments bail out before generating bad rtl. */
15050 if (arg0 == error_mark_node)
15051 return const0_rtx;
15053 if (TREE_CODE (arg0) != INTEGER_CST
15054 || TREE_INT_CST_LOW (arg0) & ~0x3)
15056 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
15057 return const0_rtx;
15060 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15061 op0 = copy_to_mode_reg (mode0, op0);
15063 emit_insn (gen_altivec_dss (op0));
15064 return NULL_RTX;
15066 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15067 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15068 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15069 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15070 case VSX_BUILTIN_VEC_INIT_V2DF:
15071 case VSX_BUILTIN_VEC_INIT_V2DI:
15072 case VSX_BUILTIN_VEC_INIT_V1TI:
15073 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15075 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15076 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15077 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15078 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15079 case VSX_BUILTIN_VEC_SET_V2DF:
15080 case VSX_BUILTIN_VEC_SET_V2DI:
15081 case VSX_BUILTIN_VEC_SET_V1TI:
15082 return altivec_expand_vec_set_builtin (exp);
15084 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15085 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15086 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15087 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15088 case VSX_BUILTIN_VEC_EXT_V2DF:
15089 case VSX_BUILTIN_VEC_EXT_V2DI:
15090 case VSX_BUILTIN_VEC_EXT_V1TI:
15091 return altivec_expand_vec_ext_builtin (exp, target);
15093 case P9V_BUILTIN_VEC_EXTRACT4B:
15094 arg1 = CALL_EXPR_ARG (exp, 1);
15095 STRIP_NOPS (arg1);
15097 /* Generate a normal call if it is invalid. */
15098 if (arg1 == error_mark_node)
15099 return expand_call (exp, target, false);
15101 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
15103 error ("second argument to %qs must be 0..12", "vec_vextract4b");
15104 return expand_call (exp, target, false);
15106 break;
15108 case P9V_BUILTIN_VEC_INSERT4B:
15109 arg2 = CALL_EXPR_ARG (exp, 2);
15110 STRIP_NOPS (arg2);
15112 /* Generate a normal call if it is invalid. */
15113 if (arg2 == error_mark_node)
15114 return expand_call (exp, target, false);
15116 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
15118 error ("third argument to %qs must be 0..12", "vec_vinsert4b");
15119 return expand_call (exp, target, false);
15121 break;
15123 default:
15124 break;
15125 /* Fall through. */
15128 /* Expand abs* operations. */
15129 d = bdesc_abs;
15130 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15131 if (d->code == fcode)
15132 return altivec_expand_abs_builtin (d->icode, exp, target);
15134 /* Expand the AltiVec predicates. */
15135 d = bdesc_altivec_preds;
15136 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15137 if (d->code == fcode)
15138 return altivec_expand_predicate_builtin (d->icode, exp, target);
15140 /* LV* are funky. We initialized them differently. */
15141 switch (fcode)
15143 case ALTIVEC_BUILTIN_LVSL:
15144 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15145 exp, target, false);
15146 case ALTIVEC_BUILTIN_LVSR:
15147 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15148 exp, target, false);
15149 case ALTIVEC_BUILTIN_LVEBX:
15150 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15151 exp, target, false);
15152 case ALTIVEC_BUILTIN_LVEHX:
15153 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15154 exp, target, false);
15155 case ALTIVEC_BUILTIN_LVEWX:
15156 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15157 exp, target, false);
15158 case ALTIVEC_BUILTIN_LVXL_V2DF:
15159 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15160 exp, target, false);
15161 case ALTIVEC_BUILTIN_LVXL_V2DI:
15162 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15163 exp, target, false);
15164 case ALTIVEC_BUILTIN_LVXL_V4SF:
15165 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15166 exp, target, false);
15167 case ALTIVEC_BUILTIN_LVXL:
15168 case ALTIVEC_BUILTIN_LVXL_V4SI:
15169 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15170 exp, target, false);
15171 case ALTIVEC_BUILTIN_LVXL_V8HI:
15172 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15173 exp, target, false);
15174 case ALTIVEC_BUILTIN_LVXL_V16QI:
15175 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15176 exp, target, false);
15177 case ALTIVEC_BUILTIN_LVX_V1TI:
15178 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
15179 exp, target, false);
15180 case ALTIVEC_BUILTIN_LVX_V2DF:
15181 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
15182 exp, target, false);
15183 case ALTIVEC_BUILTIN_LVX_V2DI:
15184 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
15185 exp, target, false);
15186 case ALTIVEC_BUILTIN_LVX_V4SF:
15187 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
15188 exp, target, false);
15189 case ALTIVEC_BUILTIN_LVX:
15190 case ALTIVEC_BUILTIN_LVX_V4SI:
15191 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
15192 exp, target, false);
15193 case ALTIVEC_BUILTIN_LVX_V8HI:
15194 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
15195 exp, target, false);
15196 case ALTIVEC_BUILTIN_LVX_V16QI:
15197 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
15198 exp, target, false);
15199 case ALTIVEC_BUILTIN_LVLX:
15200 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15201 exp, target, true);
15202 case ALTIVEC_BUILTIN_LVLXL:
15203 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15204 exp, target, true);
15205 case ALTIVEC_BUILTIN_LVRX:
15206 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15207 exp, target, true);
15208 case ALTIVEC_BUILTIN_LVRXL:
15209 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15210 exp, target, true);
15211 case VSX_BUILTIN_LXVD2X_V1TI:
15212 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15213 exp, target, false);
15214 case VSX_BUILTIN_LXVD2X_V2DF:
15215 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15216 exp, target, false);
15217 case VSX_BUILTIN_LXVD2X_V2DI:
15218 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15219 exp, target, false);
15220 case VSX_BUILTIN_LXVW4X_V4SF:
15221 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15222 exp, target, false);
15223 case VSX_BUILTIN_LXVW4X_V4SI:
15224 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15225 exp, target, false);
15226 case VSX_BUILTIN_LXVW4X_V8HI:
15227 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15228 exp, target, false);
15229 case VSX_BUILTIN_LXVW4X_V16QI:
15230 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15231 exp, target, false);
15232 /* For the following on big endian, it's ok to use any appropriate
15233 unaligned-supporting load, so use a generic expander. For
15234 little-endian, the exact element-reversing instruction must
15235 be used. */
15236 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15238 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15239 : CODE_FOR_vsx_ld_elemrev_v2df);
15240 return altivec_expand_lv_builtin (code, exp, target, false);
15242 case VSX_BUILTIN_LD_ELEMREV_V1TI:
15244 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
15245 : CODE_FOR_vsx_ld_elemrev_v1ti);
15246 return altivec_expand_lv_builtin (code, exp, target, false);
15248 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15250 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15251 : CODE_FOR_vsx_ld_elemrev_v2di);
15252 return altivec_expand_lv_builtin (code, exp, target, false);
15254 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15256 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15257 : CODE_FOR_vsx_ld_elemrev_v4sf);
15258 return altivec_expand_lv_builtin (code, exp, target, false);
15260 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15262 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15263 : CODE_FOR_vsx_ld_elemrev_v4si);
15264 return altivec_expand_lv_builtin (code, exp, target, false);
15266 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15268 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15269 : CODE_FOR_vsx_ld_elemrev_v8hi);
15270 return altivec_expand_lv_builtin (code, exp, target, false);
15272 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15274 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15275 : CODE_FOR_vsx_ld_elemrev_v16qi);
15276 return altivec_expand_lv_builtin (code, exp, target, false);
15278 break;
15279 default:
15280 break;
15281 /* Fall through. */
15284 *expandedp = false;
15285 return NULL_RTX;
15288 /* Check whether a builtin function is supported in this target
15289 configuration. */
15290 bool
15291 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
15293 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
15294 if ((fnmask & rs6000_builtin_mask) != fnmask)
15295 return false;
15296 else
15297 return true;
15300 /* Raise an error message for a builtin function that is called without the
15301 appropriate target options being set. */
15303 static void
15304 rs6000_invalid_builtin (enum rs6000_builtins fncode)
15306 size_t uns_fncode = (size_t) fncode;
15307 const char *name = rs6000_builtin_info[uns_fncode].name;
15308 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
15310 gcc_assert (name != NULL);
15311 if ((fnmask & RS6000_BTM_CELL) != 0)
15312 error ("builtin function %qs is only valid for the cell processor", name);
15313 else if ((fnmask & RS6000_BTM_VSX) != 0)
15314 error ("builtin function %qs requires the %qs option", name, "-mvsx");
15315 else if ((fnmask & RS6000_BTM_HTM) != 0)
15316 error ("builtin function %qs requires the %qs option", name, "-mhtm");
15317 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
15318 error ("builtin function %qs requires the %qs option", name, "-maltivec");
15319 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15320 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15321 error ("builtin function %qs requires the %qs and %qs options",
15322 name, "-mhard-dfp", "-mpower8-vector");
15323 else if ((fnmask & RS6000_BTM_DFP) != 0)
15324 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
15325 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
15326 error ("builtin function %qs requires the %qs option", name,
15327 "-mpower8-vector");
15328 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15329 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15330 error ("builtin function %qs requires the %qs and %qs options",
15331 name, "-mcpu=power9", "-m64");
15332 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
15333 error ("builtin function %qs requires the %qs option", name,
15334 "-mcpu=power9");
15335 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15336 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15337 error ("builtin function %qs requires the %qs and %qs options",
15338 name, "-mcpu=power9", "-m64");
15339 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
15340 error ("builtin function %qs requires the %qs option", name,
15341 "-mcpu=power9");
15342 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15343 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15344 error ("builtin function %qs requires the %qs and %qs options",
15345 name, "-mhard-float", "-mlong-double-128");
15346 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
15347 error ("builtin function %qs requires the %qs option", name,
15348 "-mhard-float");
15349 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
15350 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
15351 name);
15352 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
15353 error ("builtin function %qs requires the %qs option", name, "-mfloat128");
15354 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15355 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15356 error ("builtin function %qs requires the %qs (or newer), and "
15357 "%qs or %qs options",
15358 name, "-mcpu=power7", "-m64", "-mpowerpc64");
15359 else
15360 error ("builtin function %qs is not supported with the current options",
15361 name);
15364 /* Target hook for early folding of built-ins, shamelessly stolen
15365 from ia64.c. */
15367 static tree
15368 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
15369 int n_args ATTRIBUTE_UNUSED,
15370 tree *args ATTRIBUTE_UNUSED,
15371 bool ignore ATTRIBUTE_UNUSED)
15373 #ifdef SUBTARGET_FOLD_BUILTIN
15374 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
15375 #else
15376 return NULL_TREE;
15377 #endif
15380 /* Helper function to sort out which built-ins may be valid without having
15381 a LHS. */
15382 static bool
15383 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
15385 switch (fn_code)
15387 case ALTIVEC_BUILTIN_STVX_V16QI:
15388 case ALTIVEC_BUILTIN_STVX_V8HI:
15389 case ALTIVEC_BUILTIN_STVX_V4SI:
15390 case ALTIVEC_BUILTIN_STVX_V4SF:
15391 case ALTIVEC_BUILTIN_STVX_V2DI:
15392 case ALTIVEC_BUILTIN_STVX_V2DF:
15393 return true;
15394 default:
15395 return false;
15399 /* Helper function to handle the gimple folding of a vector compare
15400 operation. This sets up true/false vectors, and uses the
15401 VEC_COND_EXPR operation.
15402 CODE indicates which comparison is to be made. (EQ, GT, ...).
15403 TYPE indicates the type of the result. */
15404 static tree
15405 fold_build_vec_cmp (tree_code code, tree type,
15406 tree arg0, tree arg1)
15408 tree cmp_type = build_same_sized_truth_vector_type (type);
15409 tree zero_vec = build_zero_cst (type);
15410 tree minus_one_vec = build_minus_one_cst (type);
15411 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
15412 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
15415 /* Helper function to handle the in-between steps for the
15416 vector compare built-ins. */
15417 static void
15418 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
15420 tree arg0 = gimple_call_arg (stmt, 0);
15421 tree arg1 = gimple_call_arg (stmt, 1);
15422 tree lhs = gimple_call_lhs (stmt);
15423 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
15424 gimple *g = gimple_build_assign (lhs, cmp);
15425 gimple_set_location (g, gimple_location (stmt));
15426 gsi_replace (gsi, g, true);
15429 /* Helper function to handle the vector merge[hl] built-ins. The
15430 implementation difference between h and l versions for this code are in
15431 the values used when building of the permute vector for high word versus
15432 low word merge. The variance is keyed off the use_high parameter. */
15433 static void
15434 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
15436 tree arg0 = gimple_call_arg (stmt, 0);
15437 tree arg1 = gimple_call_arg (stmt, 1);
15438 tree lhs = gimple_call_lhs (stmt);
15439 tree lhs_type = TREE_TYPE (lhs);
15440 tree lhs_type_type = TREE_TYPE (lhs_type);
15441 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
15442 int midpoint = n_elts / 2;
15443 int offset = 0;
15445 if (use_high == 1)
15446 offset = midpoint;
15448 tree_vector_builder elts (lhs_type, VECTOR_CST_NELTS (arg0), 1);
15450 for (int i = 0; i < midpoint; i++)
15452 elts.safe_push (build_int_cst (lhs_type_type, offset + i));
15453 elts.safe_push (build_int_cst (lhs_type_type, offset + n_elts + i));
15456 tree permute = elts.build ();
15458 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
15459 gimple_set_location (g, gimple_location (stmt));
15460 gsi_replace (gsi, g, true);
15463 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
15464 a constant, use rs6000_fold_builtin.) */
15466 bool
15467 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
15469 gimple *stmt = gsi_stmt (*gsi);
15470 tree fndecl = gimple_call_fndecl (stmt);
15471 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
15472 enum rs6000_builtins fn_code
15473 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15474 tree arg0, arg1, lhs, temp;
15475 gimple *g;
15477 size_t uns_fncode = (size_t) fn_code;
15478 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
15479 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
15480 const char *fn_name2 = (icode != CODE_FOR_nothing)
15481 ? get_insn_name ((int) icode)
15482 : "nothing";
15484 if (TARGET_DEBUG_BUILTIN)
15485 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
15486 fn_code, fn_name1, fn_name2);
15488 if (!rs6000_fold_gimple)
15489 return false;
15491 /* Prevent gimple folding for code that does not have a LHS, unless it is
15492 allowed per the rs6000_builtin_valid_without_lhs helper function. */
15493 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
15494 return false;
15496 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
15497 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
15498 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
15499 if (!func_valid_p)
15500 return false;
15502 switch (fn_code)
15504 /* Flavors of vec_add. We deliberately don't expand
15505 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
15506 TImode, resulting in much poorer code generation. */
15507 case ALTIVEC_BUILTIN_VADDUBM:
15508 case ALTIVEC_BUILTIN_VADDUHM:
15509 case ALTIVEC_BUILTIN_VADDUWM:
15510 case P8V_BUILTIN_VADDUDM:
15511 case ALTIVEC_BUILTIN_VADDFP:
15512 case VSX_BUILTIN_XVADDDP:
15513 arg0 = gimple_call_arg (stmt, 0);
15514 arg1 = gimple_call_arg (stmt, 1);
15515 lhs = gimple_call_lhs (stmt);
15516 g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
15517 gimple_set_location (g, gimple_location (stmt));
15518 gsi_replace (gsi, g, true);
15519 return true;
15520 /* Flavors of vec_sub. We deliberately don't expand
15521 P8V_BUILTIN_VSUBUQM. */
15522 case ALTIVEC_BUILTIN_VSUBUBM:
15523 case ALTIVEC_BUILTIN_VSUBUHM:
15524 case ALTIVEC_BUILTIN_VSUBUWM:
15525 case P8V_BUILTIN_VSUBUDM:
15526 case ALTIVEC_BUILTIN_VSUBFP:
15527 case VSX_BUILTIN_XVSUBDP:
15528 arg0 = gimple_call_arg (stmt, 0);
15529 arg1 = gimple_call_arg (stmt, 1);
15530 lhs = gimple_call_lhs (stmt);
15531 g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
15532 gimple_set_location (g, gimple_location (stmt));
15533 gsi_replace (gsi, g, true);
15534 return true;
15535 case VSX_BUILTIN_XVMULSP:
15536 case VSX_BUILTIN_XVMULDP:
15537 arg0 = gimple_call_arg (stmt, 0);
15538 arg1 = gimple_call_arg (stmt, 1);
15539 lhs = gimple_call_lhs (stmt);
15540 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15541 gimple_set_location (g, gimple_location (stmt));
15542 gsi_replace (gsi, g, true);
15543 return true;
15544 /* Even element flavors of vec_mul (signed). */
15545 case ALTIVEC_BUILTIN_VMULESB:
15546 case ALTIVEC_BUILTIN_VMULESH:
15547 case P8V_BUILTIN_VMULESW:
15548 /* Even element flavors of vec_mul (unsigned). */
15549 case ALTIVEC_BUILTIN_VMULEUB:
15550 case ALTIVEC_BUILTIN_VMULEUH:
15551 case P8V_BUILTIN_VMULEUW:
15552 arg0 = gimple_call_arg (stmt, 0);
15553 arg1 = gimple_call_arg (stmt, 1);
15554 lhs = gimple_call_lhs (stmt);
15555 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15556 gimple_set_location (g, gimple_location (stmt));
15557 gsi_replace (gsi, g, true);
15558 return true;
15559 /* Odd element flavors of vec_mul (signed). */
15560 case ALTIVEC_BUILTIN_VMULOSB:
15561 case ALTIVEC_BUILTIN_VMULOSH:
15562 case P8V_BUILTIN_VMULOSW:
15563 /* Odd element flavors of vec_mul (unsigned). */
15564 case ALTIVEC_BUILTIN_VMULOUB:
15565 case ALTIVEC_BUILTIN_VMULOUH:
15566 case P8V_BUILTIN_VMULOUW:
15567 arg0 = gimple_call_arg (stmt, 0);
15568 arg1 = gimple_call_arg (stmt, 1);
15569 lhs = gimple_call_lhs (stmt);
15570 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15571 gimple_set_location (g, gimple_location (stmt));
15572 gsi_replace (gsi, g, true);
15573 return true;
15574 /* Flavors of vec_div (Integer). */
15575 case VSX_BUILTIN_DIV_V2DI:
15576 case VSX_BUILTIN_UDIV_V2DI:
15577 arg0 = gimple_call_arg (stmt, 0);
15578 arg1 = gimple_call_arg (stmt, 1);
15579 lhs = gimple_call_lhs (stmt);
15580 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15581 gimple_set_location (g, gimple_location (stmt));
15582 gsi_replace (gsi, g, true);
15583 return true;
15584 /* Flavors of vec_div (Float). */
15585 case VSX_BUILTIN_XVDIVSP:
15586 case VSX_BUILTIN_XVDIVDP:
15587 arg0 = gimple_call_arg (stmt, 0);
15588 arg1 = gimple_call_arg (stmt, 1);
15589 lhs = gimple_call_lhs (stmt);
15590 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15591 gimple_set_location (g, gimple_location (stmt));
15592 gsi_replace (gsi, g, true);
15593 return true;
15594 /* Flavors of vec_and. */
15595 case ALTIVEC_BUILTIN_VAND:
15596 arg0 = gimple_call_arg (stmt, 0);
15597 arg1 = gimple_call_arg (stmt, 1);
15598 lhs = gimple_call_lhs (stmt);
15599 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15600 gimple_set_location (g, gimple_location (stmt));
15601 gsi_replace (gsi, g, true);
15602 return true;
15603 /* Flavors of vec_andc. */
15604 case ALTIVEC_BUILTIN_VANDC:
15605 arg0 = gimple_call_arg (stmt, 0);
15606 arg1 = gimple_call_arg (stmt, 1);
15607 lhs = gimple_call_lhs (stmt);
15608 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15609 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15610 gimple_set_location (g, gimple_location (stmt));
15611 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15612 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15613 gimple_set_location (g, gimple_location (stmt));
15614 gsi_replace (gsi, g, true);
15615 return true;
15616 /* Flavors of vec_nand. */
15617 case P8V_BUILTIN_VEC_NAND:
15618 case P8V_BUILTIN_NAND_V16QI:
15619 case P8V_BUILTIN_NAND_V8HI:
15620 case P8V_BUILTIN_NAND_V4SI:
15621 case P8V_BUILTIN_NAND_V4SF:
15622 case P8V_BUILTIN_NAND_V2DF:
15623 case P8V_BUILTIN_NAND_V2DI:
15624 arg0 = gimple_call_arg (stmt, 0);
15625 arg1 = gimple_call_arg (stmt, 1);
15626 lhs = gimple_call_lhs (stmt);
15627 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15628 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15629 gimple_set_location (g, gimple_location (stmt));
15630 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15631 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15632 gimple_set_location (g, gimple_location (stmt));
15633 gsi_replace (gsi, g, true);
15634 return true;
15635 /* Flavors of vec_or. */
15636 case ALTIVEC_BUILTIN_VOR:
15637 arg0 = gimple_call_arg (stmt, 0);
15638 arg1 = gimple_call_arg (stmt, 1);
15639 lhs = gimple_call_lhs (stmt);
15640 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15641 gimple_set_location (g, gimple_location (stmt));
15642 gsi_replace (gsi, g, true);
15643 return true;
15644 /* flavors of vec_orc. */
15645 case P8V_BUILTIN_ORC_V16QI:
15646 case P8V_BUILTIN_ORC_V8HI:
15647 case P8V_BUILTIN_ORC_V4SI:
15648 case P8V_BUILTIN_ORC_V4SF:
15649 case P8V_BUILTIN_ORC_V2DF:
15650 case P8V_BUILTIN_ORC_V2DI:
15651 arg0 = gimple_call_arg (stmt, 0);
15652 arg1 = gimple_call_arg (stmt, 1);
15653 lhs = gimple_call_lhs (stmt);
15654 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15655 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15656 gimple_set_location (g, gimple_location (stmt));
15657 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15658 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15659 gimple_set_location (g, gimple_location (stmt));
15660 gsi_replace (gsi, g, true);
15661 return true;
15662 /* Flavors of vec_xor. */
15663 case ALTIVEC_BUILTIN_VXOR:
15664 arg0 = gimple_call_arg (stmt, 0);
15665 arg1 = gimple_call_arg (stmt, 1);
15666 lhs = gimple_call_lhs (stmt);
15667 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15668 gimple_set_location (g, gimple_location (stmt));
15669 gsi_replace (gsi, g, true);
15670 return true;
15671 /* Flavors of vec_nor. */
15672 case ALTIVEC_BUILTIN_VNOR:
15673 arg0 = gimple_call_arg (stmt, 0);
15674 arg1 = gimple_call_arg (stmt, 1);
15675 lhs = gimple_call_lhs (stmt);
15676 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15677 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15678 gimple_set_location (g, gimple_location (stmt));
15679 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15680 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15681 gimple_set_location (g, gimple_location (stmt));
15682 gsi_replace (gsi, g, true);
15683 return true;
15684 /* flavors of vec_abs. */
15685 case ALTIVEC_BUILTIN_ABS_V16QI:
15686 case ALTIVEC_BUILTIN_ABS_V8HI:
15687 case ALTIVEC_BUILTIN_ABS_V4SI:
15688 case ALTIVEC_BUILTIN_ABS_V4SF:
15689 case P8V_BUILTIN_ABS_V2DI:
15690 case VSX_BUILTIN_XVABSDP:
15691 arg0 = gimple_call_arg (stmt, 0);
15692 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15693 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15694 return false;
15695 lhs = gimple_call_lhs (stmt);
15696 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15697 gimple_set_location (g, gimple_location (stmt));
15698 gsi_replace (gsi, g, true);
15699 return true;
15700 /* flavors of vec_min. */
15701 case VSX_BUILTIN_XVMINDP:
15702 case P8V_BUILTIN_VMINSD:
15703 case P8V_BUILTIN_VMINUD:
15704 case ALTIVEC_BUILTIN_VMINSB:
15705 case ALTIVEC_BUILTIN_VMINSH:
15706 case ALTIVEC_BUILTIN_VMINSW:
15707 case ALTIVEC_BUILTIN_VMINUB:
15708 case ALTIVEC_BUILTIN_VMINUH:
15709 case ALTIVEC_BUILTIN_VMINUW:
15710 case ALTIVEC_BUILTIN_VMINFP:
15711 arg0 = gimple_call_arg (stmt, 0);
15712 arg1 = gimple_call_arg (stmt, 1);
15713 lhs = gimple_call_lhs (stmt);
15714 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15715 gimple_set_location (g, gimple_location (stmt));
15716 gsi_replace (gsi, g, true);
15717 return true;
15718 /* flavors of vec_max. */
15719 case VSX_BUILTIN_XVMAXDP:
15720 case P8V_BUILTIN_VMAXSD:
15721 case P8V_BUILTIN_VMAXUD:
15722 case ALTIVEC_BUILTIN_VMAXSB:
15723 case ALTIVEC_BUILTIN_VMAXSH:
15724 case ALTIVEC_BUILTIN_VMAXSW:
15725 case ALTIVEC_BUILTIN_VMAXUB:
15726 case ALTIVEC_BUILTIN_VMAXUH:
15727 case ALTIVEC_BUILTIN_VMAXUW:
15728 case ALTIVEC_BUILTIN_VMAXFP:
15729 arg0 = gimple_call_arg (stmt, 0);
15730 arg1 = gimple_call_arg (stmt, 1);
15731 lhs = gimple_call_lhs (stmt);
15732 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15733 gimple_set_location (g, gimple_location (stmt));
15734 gsi_replace (gsi, g, true);
15735 return true;
15736 /* Flavors of vec_eqv. */
15737 case P8V_BUILTIN_EQV_V16QI:
15738 case P8V_BUILTIN_EQV_V8HI:
15739 case P8V_BUILTIN_EQV_V4SI:
15740 case P8V_BUILTIN_EQV_V4SF:
15741 case P8V_BUILTIN_EQV_V2DF:
15742 case P8V_BUILTIN_EQV_V2DI:
15743 arg0 = gimple_call_arg (stmt, 0);
15744 arg1 = gimple_call_arg (stmt, 1);
15745 lhs = gimple_call_lhs (stmt);
15746 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15747 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15748 gimple_set_location (g, gimple_location (stmt));
15749 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15750 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15751 gimple_set_location (g, gimple_location (stmt));
15752 gsi_replace (gsi, g, true);
15753 return true;
15754 /* Flavors of vec_rotate_left. */
15755 case ALTIVEC_BUILTIN_VRLB:
15756 case ALTIVEC_BUILTIN_VRLH:
15757 case ALTIVEC_BUILTIN_VRLW:
15758 case P8V_BUILTIN_VRLD:
15759 arg0 = gimple_call_arg (stmt, 0);
15760 arg1 = gimple_call_arg (stmt, 1);
15761 lhs = gimple_call_lhs (stmt);
15762 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15763 gimple_set_location (g, gimple_location (stmt));
15764 gsi_replace (gsi, g, true);
15765 return true;
15766 /* Flavors of vector shift right algebraic.
15767 vec_sra{b,h,w} -> vsra{b,h,w}. */
15768 case ALTIVEC_BUILTIN_VSRAB:
15769 case ALTIVEC_BUILTIN_VSRAH:
15770 case ALTIVEC_BUILTIN_VSRAW:
15771 case P8V_BUILTIN_VSRAD:
15772 arg0 = gimple_call_arg (stmt, 0);
15773 arg1 = gimple_call_arg (stmt, 1);
15774 lhs = gimple_call_lhs (stmt);
15775 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
15776 gimple_set_location (g, gimple_location (stmt));
15777 gsi_replace (gsi, g, true);
15778 return true;
15779 /* Flavors of vector shift left.
15780 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15781 case ALTIVEC_BUILTIN_VSLB:
15782 case ALTIVEC_BUILTIN_VSLH:
15783 case ALTIVEC_BUILTIN_VSLW:
15784 case P8V_BUILTIN_VSLD:
15785 arg0 = gimple_call_arg (stmt, 0);
15786 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15787 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15788 return false;
15789 arg1 = gimple_call_arg (stmt, 1);
15790 lhs = gimple_call_lhs (stmt);
15791 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
15792 gimple_set_location (g, gimple_location (stmt));
15793 gsi_replace (gsi, g, true);
15794 return true;
15795 /* Flavors of vector shift right. */
15796 case ALTIVEC_BUILTIN_VSRB:
15797 case ALTIVEC_BUILTIN_VSRH:
15798 case ALTIVEC_BUILTIN_VSRW:
15799 case P8V_BUILTIN_VSRD:
15801 arg0 = gimple_call_arg (stmt, 0);
15802 arg1 = gimple_call_arg (stmt, 1);
15803 lhs = gimple_call_lhs (stmt);
15804 gimple_seq stmts = NULL;
15805 /* Convert arg0 to unsigned. */
15806 tree arg0_unsigned
15807 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15808 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15809 tree res
15810 = gimple_build (&stmts, RSHIFT_EXPR,
15811 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
15812 /* Convert result back to the lhs type. */
15813 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15814 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15815 update_call_from_tree (gsi, res);
15816 return true;
15818 /* Vector loads. */
15819 case ALTIVEC_BUILTIN_LVX_V16QI:
15820 case ALTIVEC_BUILTIN_LVX_V8HI:
15821 case ALTIVEC_BUILTIN_LVX_V4SI:
15822 case ALTIVEC_BUILTIN_LVX_V4SF:
15823 case ALTIVEC_BUILTIN_LVX_V2DI:
15824 case ALTIVEC_BUILTIN_LVX_V2DF:
15825 case ALTIVEC_BUILTIN_LVX_V1TI:
15827 arg0 = gimple_call_arg (stmt, 0); // offset
15828 arg1 = gimple_call_arg (stmt, 1); // address
15829 lhs = gimple_call_lhs (stmt);
15830 location_t loc = gimple_location (stmt);
15831 /* Since arg1 may be cast to a different type, just use ptr_type_node
15832 here instead of trying to enforce TBAA on pointer types. */
15833 tree arg1_type = ptr_type_node;
15834 tree lhs_type = TREE_TYPE (lhs);
15835 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15836 the tree using the value from arg0. The resulting type will match
15837 the type of arg1. */
15838 gimple_seq stmts = NULL;
15839 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15840 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15841 arg1_type, arg1, temp_offset);
15842 /* Mask off any lower bits from the address. */
15843 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15844 arg1_type, temp_addr,
15845 build_int_cst (arg1_type, -16));
15846 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15847 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15848 take an offset, but since we've already incorporated the offset
15849 above, here we just pass in a zero. */
15850 gimple *g
15851 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15852 build_int_cst (arg1_type, 0)));
15853 gimple_set_location (g, loc);
15854 gsi_replace (gsi, g, true);
15855 return true;
15857 /* Vector stores. */
15858 case ALTIVEC_BUILTIN_STVX_V16QI:
15859 case ALTIVEC_BUILTIN_STVX_V8HI:
15860 case ALTIVEC_BUILTIN_STVX_V4SI:
15861 case ALTIVEC_BUILTIN_STVX_V4SF:
15862 case ALTIVEC_BUILTIN_STVX_V2DI:
15863 case ALTIVEC_BUILTIN_STVX_V2DF:
15865 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15866 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15867 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15868 location_t loc = gimple_location (stmt);
15869 tree arg0_type = TREE_TYPE (arg0);
15870 /* Use ptr_type_node (no TBAA) for the arg2_type.
15871 FIXME: (Richard) "A proper fix would be to transition this type as
15872 seen from the frontend to GIMPLE, for example in a similar way we
15873 do for MEM_REFs by piggy-backing that on an extra argument, a
15874 constant zero pointer of the alias pointer type to use (which would
15875 also serve as a type indicator of the store itself). I'd use a
15876 target specific internal function for this (not sure if we can have
15877 those target specific, but I guess if it's folded away then that's
15878 fine) and get away with the overload set." */
15879 tree arg2_type = ptr_type_node;
15880 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15881 the tree using the value from arg0. The resulting type will match
15882 the type of arg2. */
15883 gimple_seq stmts = NULL;
15884 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15885 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15886 arg2_type, arg2, temp_offset);
15887 /* Mask off any lower bits from the address. */
15888 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15889 arg2_type, temp_addr,
15890 build_int_cst (arg2_type, -16));
15891 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15892 /* The desired gimple result should be similar to:
15893 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15894 gimple *g
15895 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15896 build_int_cst (arg2_type, 0)), arg0);
15897 gimple_set_location (g, loc);
15898 gsi_replace (gsi, g, true);
15899 return true;
15902 /* Vector Fused multiply-add (fma). */
15903 case ALTIVEC_BUILTIN_VMADDFP:
15904 case VSX_BUILTIN_XVMADDDP:
15905 case ALTIVEC_BUILTIN_VMLADDUHM:
15907 arg0 = gimple_call_arg (stmt, 0);
15908 arg1 = gimple_call_arg (stmt, 1);
15909 tree arg2 = gimple_call_arg (stmt, 2);
15910 lhs = gimple_call_lhs (stmt);
15911 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15912 gimple_call_set_lhs (g, lhs);
15913 gimple_call_set_nothrow (g, true);
15914 gimple_set_location (g, gimple_location (stmt));
15915 gsi_replace (gsi, g, true);
15916 return true;
15919 /* Vector compares; EQ, NE, GE, GT, LE. */
15920 case ALTIVEC_BUILTIN_VCMPEQUB:
15921 case ALTIVEC_BUILTIN_VCMPEQUH:
15922 case ALTIVEC_BUILTIN_VCMPEQUW:
15923 case P8V_BUILTIN_VCMPEQUD:
15924 fold_compare_helper (gsi, EQ_EXPR, stmt);
15925 return true;
15927 case P9V_BUILTIN_CMPNEB:
15928 case P9V_BUILTIN_CMPNEH:
15929 case P9V_BUILTIN_CMPNEW:
15930 fold_compare_helper (gsi, NE_EXPR, stmt);
15931 return true;
15933 case VSX_BUILTIN_CMPGE_16QI:
15934 case VSX_BUILTIN_CMPGE_U16QI:
15935 case VSX_BUILTIN_CMPGE_8HI:
15936 case VSX_BUILTIN_CMPGE_U8HI:
15937 case VSX_BUILTIN_CMPGE_4SI:
15938 case VSX_BUILTIN_CMPGE_U4SI:
15939 case VSX_BUILTIN_CMPGE_2DI:
15940 case VSX_BUILTIN_CMPGE_U2DI:
15941 fold_compare_helper (gsi, GE_EXPR, stmt);
15942 return true;
15944 case ALTIVEC_BUILTIN_VCMPGTSB:
15945 case ALTIVEC_BUILTIN_VCMPGTUB:
15946 case ALTIVEC_BUILTIN_VCMPGTSH:
15947 case ALTIVEC_BUILTIN_VCMPGTUH:
15948 case ALTIVEC_BUILTIN_VCMPGTSW:
15949 case ALTIVEC_BUILTIN_VCMPGTUW:
15950 case P8V_BUILTIN_VCMPGTUD:
15951 case P8V_BUILTIN_VCMPGTSD:
15952 fold_compare_helper (gsi, GT_EXPR, stmt);
15953 return true;
15955 case VSX_BUILTIN_CMPLE_16QI:
15956 case VSX_BUILTIN_CMPLE_U16QI:
15957 case VSX_BUILTIN_CMPLE_8HI:
15958 case VSX_BUILTIN_CMPLE_U8HI:
15959 case VSX_BUILTIN_CMPLE_4SI:
15960 case VSX_BUILTIN_CMPLE_U4SI:
15961 case VSX_BUILTIN_CMPLE_2DI:
15962 case VSX_BUILTIN_CMPLE_U2DI:
15963 fold_compare_helper (gsi, LE_EXPR, stmt);
15964 return true;
15966 /* flavors of vec_splat_[us]{8,16,32}. */
15967 case ALTIVEC_BUILTIN_VSPLTISB:
15968 case ALTIVEC_BUILTIN_VSPLTISH:
15969 case ALTIVEC_BUILTIN_VSPLTISW:
15971 int size;
15973 if (fn_code == ALTIVEC_BUILTIN_VSPLTISB)
15974 size = 8;
15975 else if (fn_code == ALTIVEC_BUILTIN_VSPLTISH)
15976 size = 16;
15977 else
15978 size = 32;
15980 arg0 = gimple_call_arg (stmt, 0);
15981 lhs = gimple_call_lhs (stmt);
15983 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
15984 5-bit signed constant in range -16 to +15. */
15985 if (TREE_CODE (arg0) != INTEGER_CST
15986 || !IN_RANGE (sext_hwi(TREE_INT_CST_LOW (arg0), size),
15987 -16, 15))
15988 return false;
15989 gimple_seq stmts = NULL;
15990 location_t loc = gimple_location (stmt);
15991 tree splat_value = gimple_convert (&stmts, loc,
15992 TREE_TYPE (TREE_TYPE (lhs)), arg0);
15993 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15994 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
15995 g = gimple_build_assign (lhs, splat_tree);
15996 gimple_set_location (g, gimple_location (stmt));
15997 gsi_replace (gsi, g, true);
15998 return true;
16001 /* vec_mergel (integrals). */
16002 case ALTIVEC_BUILTIN_VMRGLH:
16003 case ALTIVEC_BUILTIN_VMRGLW:
16004 case VSX_BUILTIN_XXMRGLW_4SI:
16005 case ALTIVEC_BUILTIN_VMRGLB:
16006 case VSX_BUILTIN_VEC_MERGEL_V2DI:
16007 fold_mergehl_helper (gsi, stmt, 1);
16008 return true;
16009 /* vec_mergeh (integrals). */
16010 case ALTIVEC_BUILTIN_VMRGHH:
16011 case ALTIVEC_BUILTIN_VMRGHW:
16012 case VSX_BUILTIN_XXMRGHW_4SI:
16013 case ALTIVEC_BUILTIN_VMRGHB:
16014 case VSX_BUILTIN_VEC_MERGEH_V2DI:
16015 fold_mergehl_helper (gsi, stmt, 0);
16016 return true;
16017 default:
16018 if (TARGET_DEBUG_BUILTIN)
16019 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
16020 fn_code, fn_name1, fn_name2);
16021 break;
16024 return false;
16027 /* Expand an expression EXP that calls a built-in function,
16028 with result going to TARGET if that's convenient
16029 (and in mode MODE if that's convenient).
16030 SUBTARGET may be used as the target for computing one of EXP's operands.
16031 IGNORE is nonzero if the value is to be ignored. */
16033 static rtx
16034 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16035 machine_mode mode ATTRIBUTE_UNUSED,
16036 int ignore ATTRIBUTE_UNUSED)
16038 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16039 enum rs6000_builtins fcode
16040 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16041 size_t uns_fcode = (size_t)fcode;
16042 const struct builtin_description *d;
16043 size_t i;
16044 rtx ret;
16045 bool success;
16046 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16047 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16048 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16050 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
16051 floating point type, depending on whether long double is the IBM extended
16052 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
16053 we only define one variant of the built-in function, and switch the code
16054 when defining it, rather than defining two built-ins and using the
16055 overload table in rs6000-c.c to switch between the two. If we don't have
16056 the proper assembler, don't do this switch because CODE_FOR_*kf* and
16057 CODE_FOR_*tf* will be CODE_FOR_nothing. */
16058 #ifdef HAVE_AS_POWER9
16059 if (FLOAT128_IEEE_P (TFmode))
16060 switch (icode)
16062 default:
16063 break;
16065 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
16066 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
16067 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
16068 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
16069 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
16070 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
16071 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
16072 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
16073 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
16074 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
16075 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
16076 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
16077 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
16079 #endif
16081 if (TARGET_DEBUG_BUILTIN)
16083 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16084 const char *name2 = (icode != CODE_FOR_nothing)
16085 ? get_insn_name ((int) icode)
16086 : "nothing";
16087 const char *name3;
16089 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16091 default: name3 = "unknown"; break;
16092 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16093 case RS6000_BTC_UNARY: name3 = "unary"; break;
16094 case RS6000_BTC_BINARY: name3 = "binary"; break;
16095 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16096 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16097 case RS6000_BTC_ABS: name3 = "abs"; break;
16098 case RS6000_BTC_DST: name3 = "dst"; break;
16102 fprintf (stderr,
16103 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16104 (name1) ? name1 : "---", fcode,
16105 (name2) ? name2 : "---", (int) icode,
16106 name3,
16107 func_valid_p ? "" : ", not valid");
16110 if (!func_valid_p)
16112 rs6000_invalid_builtin (fcode);
16114 /* Given it is invalid, just generate a normal call. */
16115 return expand_call (exp, target, ignore);
16118 switch (fcode)
16120 case RS6000_BUILTIN_RECIP:
16121 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16123 case RS6000_BUILTIN_RECIPF:
16124 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16126 case RS6000_BUILTIN_RSQRTF:
16127 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16129 case RS6000_BUILTIN_RSQRT:
16130 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16132 case POWER7_BUILTIN_BPERMD:
16133 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16134 ? CODE_FOR_bpermd_di
16135 : CODE_FOR_bpermd_si), exp, target);
16137 case RS6000_BUILTIN_GET_TB:
16138 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16139 target);
16141 case RS6000_BUILTIN_MFTB:
16142 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16143 ? CODE_FOR_rs6000_mftb_di
16144 : CODE_FOR_rs6000_mftb_si),
16145 target);
16147 case RS6000_BUILTIN_MFFS:
16148 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16150 case RS6000_BUILTIN_MTFSF:
16151 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16153 case RS6000_BUILTIN_CPU_INIT:
16154 case RS6000_BUILTIN_CPU_IS:
16155 case RS6000_BUILTIN_CPU_SUPPORTS:
16156 return cpu_expand_builtin (fcode, exp, target);
16158 case MISC_BUILTIN_SPEC_BARRIER:
16160 emit_insn (gen_rs6000_speculation_barrier ());
16161 return NULL_RTX;
16164 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16165 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16167 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16168 : (int) CODE_FOR_altivec_lvsl_direct);
16169 machine_mode tmode = insn_data[icode2].operand[0].mode;
16170 machine_mode mode = insn_data[icode2].operand[1].mode;
16171 tree arg;
16172 rtx op, addr, pat;
16174 gcc_assert (TARGET_ALTIVEC);
16176 arg = CALL_EXPR_ARG (exp, 0);
16177 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16178 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16179 addr = memory_address (mode, op);
16180 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16181 op = addr;
16182 else
16184 /* For the load case need to negate the address. */
16185 op = gen_reg_rtx (GET_MODE (addr));
16186 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16188 op = gen_rtx_MEM (mode, op);
16190 if (target == 0
16191 || GET_MODE (target) != tmode
16192 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16193 target = gen_reg_rtx (tmode);
16195 pat = GEN_FCN (icode2) (target, op);
16196 if (!pat)
16197 return 0;
16198 emit_insn (pat);
16200 return target;
16203 case ALTIVEC_BUILTIN_VCFUX:
16204 case ALTIVEC_BUILTIN_VCFSX:
16205 case ALTIVEC_BUILTIN_VCTUXS:
16206 case ALTIVEC_BUILTIN_VCTSXS:
16207 /* FIXME: There's got to be a nicer way to handle this case than
16208 constructing a new CALL_EXPR. */
16209 if (call_expr_nargs (exp) == 1)
16211 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16212 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16214 break;
16216 default:
16217 break;
16220 if (TARGET_ALTIVEC)
16222 ret = altivec_expand_builtin (exp, target, &success);
16224 if (success)
16225 return ret;
16227 if (TARGET_HTM)
16229 ret = htm_expand_builtin (exp, target, &success);
16231 if (success)
16232 return ret;
16235 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16236 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16237 gcc_assert (attr == RS6000_BTC_UNARY
16238 || attr == RS6000_BTC_BINARY
16239 || attr == RS6000_BTC_TERNARY
16240 || attr == RS6000_BTC_SPECIAL);
16242 /* Handle simple unary operations. */
16243 d = bdesc_1arg;
16244 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16245 if (d->code == fcode)
16246 return rs6000_expand_unop_builtin (icode, exp, target);
16248 /* Handle simple binary operations. */
16249 d = bdesc_2arg;
16250 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16251 if (d->code == fcode)
16252 return rs6000_expand_binop_builtin (icode, exp, target);
16254 /* Handle simple ternary operations. */
16255 d = bdesc_3arg;
16256 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16257 if (d->code == fcode)
16258 return rs6000_expand_ternop_builtin (icode, exp, target);
16260 /* Handle simple no-argument operations. */
16261 d = bdesc_0arg;
16262 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16263 if (d->code == fcode)
16264 return rs6000_expand_zeroop_builtin (icode, target);
16266 gcc_unreachable ();
16269 /* Create a builtin vector type with a name. Taking care not to give
16270 the canonical type a name. */
16272 static tree
16273 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16275 tree result = build_vector_type (elt_type, num_elts);
16277 /* Copy so we don't give the canonical type a name. */
16278 result = build_variant_type_copy (result);
16280 add_builtin_type (name, result);
16282 return result;
16285 static void
16286 rs6000_init_builtins (void)
16288 tree tdecl;
16289 tree ftype;
16290 machine_mode mode;
16292 if (TARGET_DEBUG_BUILTIN)
16293 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16294 (TARGET_ALTIVEC) ? ", altivec" : "",
16295 (TARGET_VSX) ? ", vsx" : "");
16297 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16298 : "__vector long long",
16299 intDI_type_node, 2);
16300 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16301 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16302 intSI_type_node, 4);
16303 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16304 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16305 intHI_type_node, 8);
16306 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16307 intQI_type_node, 16);
16309 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16310 unsigned_intQI_type_node, 16);
16311 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16312 unsigned_intHI_type_node, 8);
16313 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16314 unsigned_intSI_type_node, 4);
16315 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16316 ? "__vector unsigned long"
16317 : "__vector unsigned long long",
16318 unsigned_intDI_type_node, 2);
16320 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16322 const_str_type_node
16323 = build_pointer_type (build_qualified_type (char_type_node,
16324 TYPE_QUAL_CONST));
16326 /* We use V1TI mode as a special container to hold __int128_t items that
16327 must live in VSX registers. */
16328 if (intTI_type_node)
16330 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16331 intTI_type_node, 1);
16332 unsigned_V1TI_type_node
16333 = rs6000_vector_type ("__vector unsigned __int128",
16334 unsigned_intTI_type_node, 1);
16337 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16338 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16339 'vector unsigned short'. */
16341 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16342 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16343 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16344 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16345 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16347 long_integer_type_internal_node = long_integer_type_node;
16348 long_unsigned_type_internal_node = long_unsigned_type_node;
16349 long_long_integer_type_internal_node = long_long_integer_type_node;
16350 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16351 intQI_type_internal_node = intQI_type_node;
16352 uintQI_type_internal_node = unsigned_intQI_type_node;
16353 intHI_type_internal_node = intHI_type_node;
16354 uintHI_type_internal_node = unsigned_intHI_type_node;
16355 intSI_type_internal_node = intSI_type_node;
16356 uintSI_type_internal_node = unsigned_intSI_type_node;
16357 intDI_type_internal_node = intDI_type_node;
16358 uintDI_type_internal_node = unsigned_intDI_type_node;
16359 intTI_type_internal_node = intTI_type_node;
16360 uintTI_type_internal_node = unsigned_intTI_type_node;
16361 float_type_internal_node = float_type_node;
16362 double_type_internal_node = double_type_node;
16363 long_double_type_internal_node = long_double_type_node;
16364 dfloat64_type_internal_node = dfloat64_type_node;
16365 dfloat128_type_internal_node = dfloat128_type_node;
16366 void_type_internal_node = void_type_node;
16368 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16369 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16370 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16371 format that uses a pair of doubles, depending on the switches and
16372 defaults.
16374 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16375 floating point, we need make sure the type is non-zero or else self-test
16376 fails during bootstrap.
16378 Always create __ibm128 as a separate type, even if the current long double
16379 format is IBM extended double.
16381 For IEEE 128-bit floating point, always create the type __ieee128. If the
16382 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16383 __ieee128. */
16384 if (TARGET_FLOAT128_TYPE)
16386 if (TARGET_IEEEQUAD || !TARGET_LONG_DOUBLE_128)
16388 ibm128_float_type_node = make_node (REAL_TYPE);
16389 TYPE_PRECISION (ibm128_float_type_node) = 128;
16390 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16391 layout_type (ibm128_float_type_node);
16393 else
16394 ibm128_float_type_node = long_double_type_node;
16396 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16397 "__ibm128");
16399 ieee128_float_type_node
16400 = TARGET_IEEEQUAD ? long_double_type_node : float128_type_node;
16401 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16402 "__ieee128");
16405 else
16406 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16408 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16409 tree type node. */
16410 builtin_mode_to_type[QImode][0] = integer_type_node;
16411 builtin_mode_to_type[HImode][0] = integer_type_node;
16412 builtin_mode_to_type[SImode][0] = intSI_type_node;
16413 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16414 builtin_mode_to_type[DImode][0] = intDI_type_node;
16415 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16416 builtin_mode_to_type[TImode][0] = intTI_type_node;
16417 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16418 builtin_mode_to_type[SFmode][0] = float_type_node;
16419 builtin_mode_to_type[DFmode][0] = double_type_node;
16420 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16421 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16422 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16423 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16424 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16425 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16426 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16427 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16428 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16429 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16430 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16431 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16432 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16433 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16434 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16435 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16436 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16438 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16439 TYPE_NAME (bool_char_type_node) = tdecl;
16441 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16442 TYPE_NAME (bool_short_type_node) = tdecl;
16444 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16445 TYPE_NAME (bool_int_type_node) = tdecl;
16447 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16448 TYPE_NAME (pixel_type_node) = tdecl;
16450 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16451 bool_char_type_node, 16);
16452 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16453 bool_short_type_node, 8);
16454 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16455 bool_int_type_node, 4);
16456 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16457 ? "__vector __bool long"
16458 : "__vector __bool long long",
16459 bool_long_long_type_node, 2);
16460 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16461 pixel_type_node, 8);
16463 /* Create Altivec and VSX builtins on machines with at least the
16464 general purpose extensions (970 and newer) to allow the use of
16465 the target attribute. */
16466 if (TARGET_EXTRA_BUILTINS)
16467 altivec_init_builtins ();
16468 if (TARGET_HTM)
16469 htm_init_builtins ();
16471 if (TARGET_EXTRA_BUILTINS)
16472 rs6000_common_init_builtins ();
16474 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16475 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16476 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16478 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16479 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16480 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16482 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16483 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16484 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16486 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16487 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16488 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16490 mode = (TARGET_64BIT) ? DImode : SImode;
16491 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16492 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16493 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16495 ftype = build_function_type_list (unsigned_intDI_type_node,
16496 NULL_TREE);
16497 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16499 if (TARGET_64BIT)
16500 ftype = build_function_type_list (unsigned_intDI_type_node,
16501 NULL_TREE);
16502 else
16503 ftype = build_function_type_list (unsigned_intSI_type_node,
16504 NULL_TREE);
16505 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16507 ftype = build_function_type_list (double_type_node, NULL_TREE);
16508 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16510 ftype = build_function_type_list (void_type_node,
16511 intSI_type_node, double_type_node,
16512 NULL_TREE);
16513 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16515 ftype = build_function_type_list (void_type_node, NULL_TREE);
16516 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16517 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16518 MISC_BUILTIN_SPEC_BARRIER);
16520 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16521 NULL_TREE);
16522 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16523 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16525 /* AIX libm provides clog as __clog. */
16526 if (TARGET_XCOFF &&
16527 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16528 set_user_assembler_name (tdecl, "__clog");
16530 #ifdef SUBTARGET_INIT_BUILTINS
16531 SUBTARGET_INIT_BUILTINS;
16532 #endif
16535 /* Returns the rs6000 builtin decl for CODE. */
16537 static tree
16538 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16540 HOST_WIDE_INT fnmask;
16542 if (code >= RS6000_BUILTIN_COUNT)
16543 return error_mark_node;
16545 fnmask = rs6000_builtin_info[code].mask;
16546 if ((fnmask & rs6000_builtin_mask) != fnmask)
16548 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16549 return error_mark_node;
16552 return rs6000_builtin_decls[code];
16555 static void
16556 altivec_init_builtins (void)
16558 const struct builtin_description *d;
16559 size_t i;
16560 tree ftype;
16561 tree decl;
16562 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16564 tree pvoid_type_node = build_pointer_type (void_type_node);
16566 tree pcvoid_type_node
16567 = build_pointer_type (build_qualified_type (void_type_node,
16568 TYPE_QUAL_CONST));
16570 tree int_ftype_opaque
16571 = build_function_type_list (integer_type_node,
16572 opaque_V4SI_type_node, NULL_TREE);
16573 tree opaque_ftype_opaque
16574 = build_function_type_list (integer_type_node, NULL_TREE);
16575 tree opaque_ftype_opaque_int
16576 = build_function_type_list (opaque_V4SI_type_node,
16577 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16578 tree opaque_ftype_opaque_opaque_int
16579 = build_function_type_list (opaque_V4SI_type_node,
16580 opaque_V4SI_type_node, opaque_V4SI_type_node,
16581 integer_type_node, NULL_TREE);
16582 tree opaque_ftype_opaque_opaque_opaque
16583 = build_function_type_list (opaque_V4SI_type_node,
16584 opaque_V4SI_type_node, opaque_V4SI_type_node,
16585 opaque_V4SI_type_node, NULL_TREE);
16586 tree opaque_ftype_opaque_opaque
16587 = build_function_type_list (opaque_V4SI_type_node,
16588 opaque_V4SI_type_node, opaque_V4SI_type_node,
16589 NULL_TREE);
16590 tree int_ftype_int_opaque_opaque
16591 = build_function_type_list (integer_type_node,
16592 integer_type_node, opaque_V4SI_type_node,
16593 opaque_V4SI_type_node, NULL_TREE);
16594 tree int_ftype_int_v4si_v4si
16595 = build_function_type_list (integer_type_node,
16596 integer_type_node, V4SI_type_node,
16597 V4SI_type_node, NULL_TREE);
16598 tree int_ftype_int_v2di_v2di
16599 = build_function_type_list (integer_type_node,
16600 integer_type_node, V2DI_type_node,
16601 V2DI_type_node, NULL_TREE);
16602 tree void_ftype_v4si
16603 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16604 tree v8hi_ftype_void
16605 = build_function_type_list (V8HI_type_node, NULL_TREE);
16606 tree void_ftype_void
16607 = build_function_type_list (void_type_node, NULL_TREE);
16608 tree void_ftype_int
16609 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16611 tree opaque_ftype_long_pcvoid
16612 = build_function_type_list (opaque_V4SI_type_node,
16613 long_integer_type_node, pcvoid_type_node,
16614 NULL_TREE);
16615 tree v16qi_ftype_long_pcvoid
16616 = build_function_type_list (V16QI_type_node,
16617 long_integer_type_node, pcvoid_type_node,
16618 NULL_TREE);
16619 tree v8hi_ftype_long_pcvoid
16620 = build_function_type_list (V8HI_type_node,
16621 long_integer_type_node, pcvoid_type_node,
16622 NULL_TREE);
16623 tree v4si_ftype_long_pcvoid
16624 = build_function_type_list (V4SI_type_node,
16625 long_integer_type_node, pcvoid_type_node,
16626 NULL_TREE);
16627 tree v4sf_ftype_long_pcvoid
16628 = build_function_type_list (V4SF_type_node,
16629 long_integer_type_node, pcvoid_type_node,
16630 NULL_TREE);
16631 tree v2df_ftype_long_pcvoid
16632 = build_function_type_list (V2DF_type_node,
16633 long_integer_type_node, pcvoid_type_node,
16634 NULL_TREE);
16635 tree v2di_ftype_long_pcvoid
16636 = build_function_type_list (V2DI_type_node,
16637 long_integer_type_node, pcvoid_type_node,
16638 NULL_TREE);
16639 tree v1ti_ftype_long_pcvoid
16640 = build_function_type_list (V1TI_type_node,
16641 long_integer_type_node, pcvoid_type_node,
16642 NULL_TREE);
16644 tree void_ftype_opaque_long_pvoid
16645 = build_function_type_list (void_type_node,
16646 opaque_V4SI_type_node, long_integer_type_node,
16647 pvoid_type_node, NULL_TREE);
16648 tree void_ftype_v4si_long_pvoid
16649 = build_function_type_list (void_type_node,
16650 V4SI_type_node, long_integer_type_node,
16651 pvoid_type_node, NULL_TREE);
16652 tree void_ftype_v16qi_long_pvoid
16653 = build_function_type_list (void_type_node,
16654 V16QI_type_node, long_integer_type_node,
16655 pvoid_type_node, NULL_TREE);
16657 tree void_ftype_v16qi_pvoid_long
16658 = build_function_type_list (void_type_node,
16659 V16QI_type_node, pvoid_type_node,
16660 long_integer_type_node, NULL_TREE);
16662 tree void_ftype_v8hi_long_pvoid
16663 = build_function_type_list (void_type_node,
16664 V8HI_type_node, long_integer_type_node,
16665 pvoid_type_node, NULL_TREE);
16666 tree void_ftype_v4sf_long_pvoid
16667 = build_function_type_list (void_type_node,
16668 V4SF_type_node, long_integer_type_node,
16669 pvoid_type_node, NULL_TREE);
16670 tree void_ftype_v2df_long_pvoid
16671 = build_function_type_list (void_type_node,
16672 V2DF_type_node, long_integer_type_node,
16673 pvoid_type_node, NULL_TREE);
16674 tree void_ftype_v1ti_long_pvoid
16675 = build_function_type_list (void_type_node,
16676 V1TI_type_node, long_integer_type_node,
16677 pvoid_type_node, NULL_TREE);
16678 tree void_ftype_v2di_long_pvoid
16679 = build_function_type_list (void_type_node,
16680 V2DI_type_node, long_integer_type_node,
16681 pvoid_type_node, NULL_TREE);
16682 tree int_ftype_int_v8hi_v8hi
16683 = build_function_type_list (integer_type_node,
16684 integer_type_node, V8HI_type_node,
16685 V8HI_type_node, NULL_TREE);
16686 tree int_ftype_int_v16qi_v16qi
16687 = build_function_type_list (integer_type_node,
16688 integer_type_node, V16QI_type_node,
16689 V16QI_type_node, NULL_TREE);
16690 tree int_ftype_int_v4sf_v4sf
16691 = build_function_type_list (integer_type_node,
16692 integer_type_node, V4SF_type_node,
16693 V4SF_type_node, NULL_TREE);
16694 tree int_ftype_int_v2df_v2df
16695 = build_function_type_list (integer_type_node,
16696 integer_type_node, V2DF_type_node,
16697 V2DF_type_node, NULL_TREE);
16698 tree v2di_ftype_v2di
16699 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16700 tree v4si_ftype_v4si
16701 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16702 tree v8hi_ftype_v8hi
16703 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16704 tree v16qi_ftype_v16qi
16705 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16706 tree v4sf_ftype_v4sf
16707 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16708 tree v2df_ftype_v2df
16709 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16710 tree void_ftype_pcvoid_int_int
16711 = build_function_type_list (void_type_node,
16712 pcvoid_type_node, integer_type_node,
16713 integer_type_node, NULL_TREE);
16715 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16716 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16717 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16718 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16719 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16720 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16721 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16722 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16723 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16724 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16725 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16726 ALTIVEC_BUILTIN_LVXL_V2DF);
16727 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16728 ALTIVEC_BUILTIN_LVXL_V2DI);
16729 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16730 ALTIVEC_BUILTIN_LVXL_V4SF);
16731 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16732 ALTIVEC_BUILTIN_LVXL_V4SI);
16733 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16734 ALTIVEC_BUILTIN_LVXL_V8HI);
16735 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16736 ALTIVEC_BUILTIN_LVXL_V16QI);
16737 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16738 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16739 ALTIVEC_BUILTIN_LVX_V1TI);
16740 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16741 ALTIVEC_BUILTIN_LVX_V2DF);
16742 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16743 ALTIVEC_BUILTIN_LVX_V2DI);
16744 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16745 ALTIVEC_BUILTIN_LVX_V4SF);
16746 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16747 ALTIVEC_BUILTIN_LVX_V4SI);
16748 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16749 ALTIVEC_BUILTIN_LVX_V8HI);
16750 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16751 ALTIVEC_BUILTIN_LVX_V16QI);
16752 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16753 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16754 ALTIVEC_BUILTIN_STVX_V2DF);
16755 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16756 ALTIVEC_BUILTIN_STVX_V2DI);
16757 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16758 ALTIVEC_BUILTIN_STVX_V4SF);
16759 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16760 ALTIVEC_BUILTIN_STVX_V4SI);
16761 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16762 ALTIVEC_BUILTIN_STVX_V8HI);
16763 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16764 ALTIVEC_BUILTIN_STVX_V16QI);
16765 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16766 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16767 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16768 ALTIVEC_BUILTIN_STVXL_V2DF);
16769 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16770 ALTIVEC_BUILTIN_STVXL_V2DI);
16771 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16772 ALTIVEC_BUILTIN_STVXL_V4SF);
16773 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16774 ALTIVEC_BUILTIN_STVXL_V4SI);
16775 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16776 ALTIVEC_BUILTIN_STVXL_V8HI);
16777 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16778 ALTIVEC_BUILTIN_STVXL_V16QI);
16779 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16780 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16781 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16782 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16783 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16784 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16785 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16786 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16787 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16788 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16789 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16790 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16791 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16792 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16793 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16794 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16796 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16797 VSX_BUILTIN_LXVD2X_V2DF);
16798 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16799 VSX_BUILTIN_LXVD2X_V2DI);
16800 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16801 VSX_BUILTIN_LXVW4X_V4SF);
16802 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16803 VSX_BUILTIN_LXVW4X_V4SI);
16804 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16805 VSX_BUILTIN_LXVW4X_V8HI);
16806 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16807 VSX_BUILTIN_LXVW4X_V16QI);
16808 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16809 VSX_BUILTIN_STXVD2X_V2DF);
16810 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16811 VSX_BUILTIN_STXVD2X_V2DI);
16812 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16813 VSX_BUILTIN_STXVW4X_V4SF);
16814 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16815 VSX_BUILTIN_STXVW4X_V4SI);
16816 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16817 VSX_BUILTIN_STXVW4X_V8HI);
16818 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16819 VSX_BUILTIN_STXVW4X_V16QI);
16821 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16822 VSX_BUILTIN_LD_ELEMREV_V2DF);
16823 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16824 VSX_BUILTIN_LD_ELEMREV_V2DI);
16825 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16826 VSX_BUILTIN_LD_ELEMREV_V4SF);
16827 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16828 VSX_BUILTIN_LD_ELEMREV_V4SI);
16829 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16830 VSX_BUILTIN_LD_ELEMREV_V8HI);
16831 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16832 VSX_BUILTIN_LD_ELEMREV_V16QI);
16833 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16834 VSX_BUILTIN_ST_ELEMREV_V2DF);
16835 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16836 VSX_BUILTIN_ST_ELEMREV_V1TI);
16837 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16838 VSX_BUILTIN_ST_ELEMREV_V2DI);
16839 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16840 VSX_BUILTIN_ST_ELEMREV_V4SF);
16841 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16842 VSX_BUILTIN_ST_ELEMREV_V4SI);
16843 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16844 VSX_BUILTIN_ST_ELEMREV_V8HI);
16845 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16846 VSX_BUILTIN_ST_ELEMREV_V16QI);
16848 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16849 VSX_BUILTIN_VEC_LD);
16850 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16851 VSX_BUILTIN_VEC_ST);
16852 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16853 VSX_BUILTIN_VEC_XL);
16854 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16855 VSX_BUILTIN_VEC_XL_BE);
16856 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16857 VSX_BUILTIN_VEC_XST);
16858 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16859 VSX_BUILTIN_VEC_XST_BE);
16861 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16862 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16863 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16865 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16866 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16867 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16868 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16869 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16870 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16871 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16872 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16873 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16874 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16875 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16876 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16878 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16879 ALTIVEC_BUILTIN_VEC_ADDE);
16880 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16881 ALTIVEC_BUILTIN_VEC_ADDEC);
16882 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16883 ALTIVEC_BUILTIN_VEC_CMPNE);
16884 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16885 ALTIVEC_BUILTIN_VEC_MUL);
16886 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16887 ALTIVEC_BUILTIN_VEC_SUBE);
16888 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16889 ALTIVEC_BUILTIN_VEC_SUBEC);
16891 /* Cell builtins. */
16892 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16893 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16894 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16895 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16897 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16898 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16899 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16900 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16902 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16903 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16904 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16905 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16907 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16908 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16909 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16910 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16912 if (TARGET_P9_VECTOR)
16914 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16915 P9V_BUILTIN_STXVL);
16916 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16917 P9V_BUILTIN_XST_LEN_R);
16920 /* Add the DST variants. */
16921 d = bdesc_dst;
16922 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16924 HOST_WIDE_INT mask = d->mask;
16926 /* It is expected that these dst built-in functions may have
16927 d->icode equal to CODE_FOR_nothing. */
16928 if ((mask & builtin_mask) != mask)
16930 if (TARGET_DEBUG_BUILTIN)
16931 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16932 d->name);
16933 continue;
16935 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16938 /* Initialize the predicates. */
16939 d = bdesc_altivec_preds;
16940 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16942 machine_mode mode1;
16943 tree type;
16944 HOST_WIDE_INT mask = d->mask;
16946 if ((mask & builtin_mask) != mask)
16948 if (TARGET_DEBUG_BUILTIN)
16949 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
16950 d->name);
16951 continue;
16954 if (rs6000_overloaded_builtin_p (d->code))
16955 mode1 = VOIDmode;
16956 else
16958 /* Cannot define builtin if the instruction is disabled. */
16959 gcc_assert (d->icode != CODE_FOR_nothing);
16960 mode1 = insn_data[d->icode].operand[1].mode;
16963 switch (mode1)
16965 case E_VOIDmode:
16966 type = int_ftype_int_opaque_opaque;
16967 break;
16968 case E_V2DImode:
16969 type = int_ftype_int_v2di_v2di;
16970 break;
16971 case E_V4SImode:
16972 type = int_ftype_int_v4si_v4si;
16973 break;
16974 case E_V8HImode:
16975 type = int_ftype_int_v8hi_v8hi;
16976 break;
16977 case E_V16QImode:
16978 type = int_ftype_int_v16qi_v16qi;
16979 break;
16980 case E_V4SFmode:
16981 type = int_ftype_int_v4sf_v4sf;
16982 break;
16983 case E_V2DFmode:
16984 type = int_ftype_int_v2df_v2df;
16985 break;
16986 default:
16987 gcc_unreachable ();
16990 def_builtin (d->name, type, d->code);
16993 /* Initialize the abs* operators. */
16994 d = bdesc_abs;
16995 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16997 machine_mode mode0;
16998 tree type;
16999 HOST_WIDE_INT mask = d->mask;
17001 if ((mask & builtin_mask) != mask)
17003 if (TARGET_DEBUG_BUILTIN)
17004 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17005 d->name);
17006 continue;
17009 /* Cannot define builtin if the instruction is disabled. */
17010 gcc_assert (d->icode != CODE_FOR_nothing);
17011 mode0 = insn_data[d->icode].operand[0].mode;
17013 switch (mode0)
17015 case E_V2DImode:
17016 type = v2di_ftype_v2di;
17017 break;
17018 case E_V4SImode:
17019 type = v4si_ftype_v4si;
17020 break;
17021 case E_V8HImode:
17022 type = v8hi_ftype_v8hi;
17023 break;
17024 case E_V16QImode:
17025 type = v16qi_ftype_v16qi;
17026 break;
17027 case E_V4SFmode:
17028 type = v4sf_ftype_v4sf;
17029 break;
17030 case E_V2DFmode:
17031 type = v2df_ftype_v2df;
17032 break;
17033 default:
17034 gcc_unreachable ();
17037 def_builtin (d->name, type, d->code);
17040 /* Initialize target builtin that implements
17041 targetm.vectorize.builtin_mask_for_load. */
17043 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17044 v16qi_ftype_long_pcvoid,
17045 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17046 BUILT_IN_MD, NULL, NULL_TREE);
17047 TREE_READONLY (decl) = 1;
17048 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17049 altivec_builtin_mask_for_load = decl;
17051 /* Access to the vec_init patterns. */
17052 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17053 integer_type_node, integer_type_node,
17054 integer_type_node, NULL_TREE);
17055 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17057 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17058 short_integer_type_node,
17059 short_integer_type_node,
17060 short_integer_type_node,
17061 short_integer_type_node,
17062 short_integer_type_node,
17063 short_integer_type_node,
17064 short_integer_type_node, NULL_TREE);
17065 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17067 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17068 char_type_node, char_type_node,
17069 char_type_node, char_type_node,
17070 char_type_node, char_type_node,
17071 char_type_node, char_type_node,
17072 char_type_node, char_type_node,
17073 char_type_node, char_type_node,
17074 char_type_node, char_type_node,
17075 char_type_node, NULL_TREE);
17076 def_builtin ("__builtin_vec_init_v16qi", ftype,
17077 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17079 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17080 float_type_node, float_type_node,
17081 float_type_node, NULL_TREE);
17082 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17084 /* VSX builtins. */
17085 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17086 double_type_node, NULL_TREE);
17087 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17089 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17090 intDI_type_node, NULL_TREE);
17091 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17093 /* Access to the vec_set patterns. */
17094 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17095 intSI_type_node,
17096 integer_type_node, NULL_TREE);
17097 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17099 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17100 intHI_type_node,
17101 integer_type_node, NULL_TREE);
17102 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17104 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17105 intQI_type_node,
17106 integer_type_node, NULL_TREE);
17107 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17109 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17110 float_type_node,
17111 integer_type_node, NULL_TREE);
17112 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17114 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17115 double_type_node,
17116 integer_type_node, NULL_TREE);
17117 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17119 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17120 intDI_type_node,
17121 integer_type_node, NULL_TREE);
17122 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17124 /* Access to the vec_extract patterns. */
17125 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17126 integer_type_node, NULL_TREE);
17127 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17129 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17130 integer_type_node, NULL_TREE);
17131 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17133 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17134 integer_type_node, NULL_TREE);
17135 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17137 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17138 integer_type_node, NULL_TREE);
17139 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17141 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17142 integer_type_node, NULL_TREE);
17143 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17145 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17146 integer_type_node, NULL_TREE);
17147 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17150 if (V1TI_type_node)
17152 tree v1ti_ftype_long_pcvoid
17153 = build_function_type_list (V1TI_type_node,
17154 long_integer_type_node, pcvoid_type_node,
17155 NULL_TREE);
17156 tree void_ftype_v1ti_long_pvoid
17157 = build_function_type_list (void_type_node,
17158 V1TI_type_node, long_integer_type_node,
17159 pvoid_type_node, NULL_TREE);
17160 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17161 VSX_BUILTIN_LD_ELEMREV_V1TI);
17162 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17163 VSX_BUILTIN_LXVD2X_V1TI);
17164 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17165 VSX_BUILTIN_STXVD2X_V1TI);
17166 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17167 NULL_TREE, NULL_TREE);
17168 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17169 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17170 intTI_type_node,
17171 integer_type_node, NULL_TREE);
17172 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17173 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17174 integer_type_node, NULL_TREE);
17175 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17180 static void
17181 htm_init_builtins (void)
17183 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17184 const struct builtin_description *d;
17185 size_t i;
17187 d = bdesc_htm;
17188 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17190 tree op[MAX_HTM_OPERANDS], type;
17191 HOST_WIDE_INT mask = d->mask;
17192 unsigned attr = rs6000_builtin_info[d->code].attr;
17193 bool void_func = (attr & RS6000_BTC_VOID);
17194 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17195 int nopnds = 0;
17196 tree gpr_type_node;
17197 tree rettype;
17198 tree argtype;
17200 /* It is expected that these htm built-in functions may have
17201 d->icode equal to CODE_FOR_nothing. */
17203 if (TARGET_32BIT && TARGET_POWERPC64)
17204 gpr_type_node = long_long_unsigned_type_node;
17205 else
17206 gpr_type_node = long_unsigned_type_node;
17208 if (attr & RS6000_BTC_SPR)
17210 rettype = gpr_type_node;
17211 argtype = gpr_type_node;
17213 else if (d->code == HTM_BUILTIN_TABORTDC
17214 || d->code == HTM_BUILTIN_TABORTDCI)
17216 rettype = unsigned_type_node;
17217 argtype = gpr_type_node;
17219 else
17221 rettype = unsigned_type_node;
17222 argtype = unsigned_type_node;
17225 if ((mask & builtin_mask) != mask)
17227 if (TARGET_DEBUG_BUILTIN)
17228 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17229 continue;
17232 if (d->name == 0)
17234 if (TARGET_DEBUG_BUILTIN)
17235 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17236 (long unsigned) i);
17237 continue;
17240 op[nopnds++] = (void_func) ? void_type_node : rettype;
17242 if (attr_args == RS6000_BTC_UNARY)
17243 op[nopnds++] = argtype;
17244 else if (attr_args == RS6000_BTC_BINARY)
17246 op[nopnds++] = argtype;
17247 op[nopnds++] = argtype;
17249 else if (attr_args == RS6000_BTC_TERNARY)
17251 op[nopnds++] = argtype;
17252 op[nopnds++] = argtype;
17253 op[nopnds++] = argtype;
17256 switch (nopnds)
17258 case 1:
17259 type = build_function_type_list (op[0], NULL_TREE);
17260 break;
17261 case 2:
17262 type = build_function_type_list (op[0], op[1], NULL_TREE);
17263 break;
17264 case 3:
17265 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17266 break;
17267 case 4:
17268 type = build_function_type_list (op[0], op[1], op[2], op[3],
17269 NULL_TREE);
17270 break;
17271 default:
17272 gcc_unreachable ();
17275 def_builtin (d->name, type, d->code);
17279 /* Hash function for builtin functions with up to 3 arguments and a return
17280 type. */
17281 hashval_t
17282 builtin_hasher::hash (builtin_hash_struct *bh)
17284 unsigned ret = 0;
17285 int i;
17287 for (i = 0; i < 4; i++)
17289 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17290 ret = (ret * 2) + bh->uns_p[i];
17293 return ret;
17296 /* Compare builtin hash entries H1 and H2 for equivalence. */
17297 bool
17298 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17300 return ((p1->mode[0] == p2->mode[0])
17301 && (p1->mode[1] == p2->mode[1])
17302 && (p1->mode[2] == p2->mode[2])
17303 && (p1->mode[3] == p2->mode[3])
17304 && (p1->uns_p[0] == p2->uns_p[0])
17305 && (p1->uns_p[1] == p2->uns_p[1])
17306 && (p1->uns_p[2] == p2->uns_p[2])
17307 && (p1->uns_p[3] == p2->uns_p[3]));
17310 /* Map types for builtin functions with an explicit return type and up to 3
17311 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17312 of the argument. */
17313 static tree
17314 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17315 machine_mode mode_arg1, machine_mode mode_arg2,
17316 enum rs6000_builtins builtin, const char *name)
17318 struct builtin_hash_struct h;
17319 struct builtin_hash_struct *h2;
17320 int num_args = 3;
17321 int i;
17322 tree ret_type = NULL_TREE;
17323 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17325 /* Create builtin_hash_table. */
17326 if (builtin_hash_table == NULL)
17327 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17329 h.type = NULL_TREE;
17330 h.mode[0] = mode_ret;
17331 h.mode[1] = mode_arg0;
17332 h.mode[2] = mode_arg1;
17333 h.mode[3] = mode_arg2;
17334 h.uns_p[0] = 0;
17335 h.uns_p[1] = 0;
17336 h.uns_p[2] = 0;
17337 h.uns_p[3] = 0;
17339 /* If the builtin is a type that produces unsigned results or takes unsigned
17340 arguments, and it is returned as a decl for the vectorizer (such as
17341 widening multiplies, permute), make sure the arguments and return value
17342 are type correct. */
17343 switch (builtin)
17345 /* unsigned 1 argument functions. */
17346 case CRYPTO_BUILTIN_VSBOX:
17347 case P8V_BUILTIN_VGBBD:
17348 case MISC_BUILTIN_CDTBCD:
17349 case MISC_BUILTIN_CBCDTD:
17350 h.uns_p[0] = 1;
17351 h.uns_p[1] = 1;
17352 break;
17354 /* unsigned 2 argument functions. */
17355 case ALTIVEC_BUILTIN_VMULEUB:
17356 case ALTIVEC_BUILTIN_VMULEUH:
17357 case P8V_BUILTIN_VMULEUW:
17358 case ALTIVEC_BUILTIN_VMULOUB:
17359 case ALTIVEC_BUILTIN_VMULOUH:
17360 case P8V_BUILTIN_VMULOUW:
17361 case CRYPTO_BUILTIN_VCIPHER:
17362 case CRYPTO_BUILTIN_VCIPHERLAST:
17363 case CRYPTO_BUILTIN_VNCIPHER:
17364 case CRYPTO_BUILTIN_VNCIPHERLAST:
17365 case CRYPTO_BUILTIN_VPMSUMB:
17366 case CRYPTO_BUILTIN_VPMSUMH:
17367 case CRYPTO_BUILTIN_VPMSUMW:
17368 case CRYPTO_BUILTIN_VPMSUMD:
17369 case CRYPTO_BUILTIN_VPMSUM:
17370 case MISC_BUILTIN_ADDG6S:
17371 case MISC_BUILTIN_DIVWEU:
17372 case MISC_BUILTIN_DIVDEU:
17373 case VSX_BUILTIN_UDIV_V2DI:
17374 case ALTIVEC_BUILTIN_VMAXUB:
17375 case ALTIVEC_BUILTIN_VMINUB:
17376 case ALTIVEC_BUILTIN_VMAXUH:
17377 case ALTIVEC_BUILTIN_VMINUH:
17378 case ALTIVEC_BUILTIN_VMAXUW:
17379 case ALTIVEC_BUILTIN_VMINUW:
17380 case P8V_BUILTIN_VMAXUD:
17381 case P8V_BUILTIN_VMINUD:
17382 h.uns_p[0] = 1;
17383 h.uns_p[1] = 1;
17384 h.uns_p[2] = 1;
17385 break;
17387 /* unsigned 3 argument functions. */
17388 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17389 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17390 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17391 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17392 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17393 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17394 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17395 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17396 case VSX_BUILTIN_VPERM_16QI_UNS:
17397 case VSX_BUILTIN_VPERM_8HI_UNS:
17398 case VSX_BUILTIN_VPERM_4SI_UNS:
17399 case VSX_BUILTIN_VPERM_2DI_UNS:
17400 case VSX_BUILTIN_XXSEL_16QI_UNS:
17401 case VSX_BUILTIN_XXSEL_8HI_UNS:
17402 case VSX_BUILTIN_XXSEL_4SI_UNS:
17403 case VSX_BUILTIN_XXSEL_2DI_UNS:
17404 case CRYPTO_BUILTIN_VPERMXOR:
17405 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17406 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17407 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17408 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17409 case CRYPTO_BUILTIN_VSHASIGMAW:
17410 case CRYPTO_BUILTIN_VSHASIGMAD:
17411 case CRYPTO_BUILTIN_VSHASIGMA:
17412 h.uns_p[0] = 1;
17413 h.uns_p[1] = 1;
17414 h.uns_p[2] = 1;
17415 h.uns_p[3] = 1;
17416 break;
17418 /* signed permute functions with unsigned char mask. */
17419 case ALTIVEC_BUILTIN_VPERM_16QI:
17420 case ALTIVEC_BUILTIN_VPERM_8HI:
17421 case ALTIVEC_BUILTIN_VPERM_4SI:
17422 case ALTIVEC_BUILTIN_VPERM_4SF:
17423 case ALTIVEC_BUILTIN_VPERM_2DI:
17424 case ALTIVEC_BUILTIN_VPERM_2DF:
17425 case VSX_BUILTIN_VPERM_16QI:
17426 case VSX_BUILTIN_VPERM_8HI:
17427 case VSX_BUILTIN_VPERM_4SI:
17428 case VSX_BUILTIN_VPERM_4SF:
17429 case VSX_BUILTIN_VPERM_2DI:
17430 case VSX_BUILTIN_VPERM_2DF:
17431 h.uns_p[3] = 1;
17432 break;
17434 /* unsigned args, signed return. */
17435 case VSX_BUILTIN_XVCVUXDSP:
17436 case VSX_BUILTIN_XVCVUXDDP_UNS:
17437 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17438 h.uns_p[1] = 1;
17439 break;
17441 /* signed args, unsigned return. */
17442 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17443 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17444 case MISC_BUILTIN_UNPACK_TD:
17445 case MISC_BUILTIN_UNPACK_V1TI:
17446 h.uns_p[0] = 1;
17447 break;
17449 /* unsigned arguments, bool return (compares). */
17450 case ALTIVEC_BUILTIN_VCMPEQUB:
17451 case ALTIVEC_BUILTIN_VCMPEQUH:
17452 case ALTIVEC_BUILTIN_VCMPEQUW:
17453 case P8V_BUILTIN_VCMPEQUD:
17454 case VSX_BUILTIN_CMPGE_U16QI:
17455 case VSX_BUILTIN_CMPGE_U8HI:
17456 case VSX_BUILTIN_CMPGE_U4SI:
17457 case VSX_BUILTIN_CMPGE_U2DI:
17458 case ALTIVEC_BUILTIN_VCMPGTUB:
17459 case ALTIVEC_BUILTIN_VCMPGTUH:
17460 case ALTIVEC_BUILTIN_VCMPGTUW:
17461 case P8V_BUILTIN_VCMPGTUD:
17462 h.uns_p[1] = 1;
17463 h.uns_p[2] = 1;
17464 break;
17466 /* unsigned arguments for 128-bit pack instructions. */
17467 case MISC_BUILTIN_PACK_TD:
17468 case MISC_BUILTIN_PACK_V1TI:
17469 h.uns_p[1] = 1;
17470 h.uns_p[2] = 1;
17471 break;
17473 /* unsigned second arguments (vector shift right). */
17474 case ALTIVEC_BUILTIN_VSRB:
17475 case ALTIVEC_BUILTIN_VSRH:
17476 case ALTIVEC_BUILTIN_VSRW:
17477 case P8V_BUILTIN_VSRD:
17478 h.uns_p[2] = 1;
17479 break;
17481 default:
17482 break;
17485 /* Figure out how many args are present. */
17486 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17487 num_args--;
17489 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17490 if (!ret_type && h.uns_p[0])
17491 ret_type = builtin_mode_to_type[h.mode[0]][0];
17493 if (!ret_type)
17494 fatal_error (input_location,
17495 "internal error: builtin function %qs had an unexpected "
17496 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17498 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17499 arg_type[i] = NULL_TREE;
17501 for (i = 0; i < num_args; i++)
17503 int m = (int) h.mode[i+1];
17504 int uns_p = h.uns_p[i+1];
17506 arg_type[i] = builtin_mode_to_type[m][uns_p];
17507 if (!arg_type[i] && uns_p)
17508 arg_type[i] = builtin_mode_to_type[m][0];
17510 if (!arg_type[i])
17511 fatal_error (input_location,
17512 "internal error: builtin function %qs, argument %d "
17513 "had unexpected argument type %qs", name, i,
17514 GET_MODE_NAME (m));
17517 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17518 if (*found == NULL)
17520 h2 = ggc_alloc<builtin_hash_struct> ();
17521 *h2 = h;
17522 *found = h2;
17524 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17525 arg_type[2], NULL_TREE);
17528 return (*found)->type;
17531 static void
17532 rs6000_common_init_builtins (void)
17534 const struct builtin_description *d;
17535 size_t i;
17537 tree opaque_ftype_opaque = NULL_TREE;
17538 tree opaque_ftype_opaque_opaque = NULL_TREE;
17539 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17540 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17542 /* Create Altivec and VSX builtins on machines with at least the
17543 general purpose extensions (970 and newer) to allow the use of
17544 the target attribute. */
17546 if (TARGET_EXTRA_BUILTINS)
17547 builtin_mask |= RS6000_BTM_COMMON;
17549 /* Add the ternary operators. */
17550 d = bdesc_3arg;
17551 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17553 tree type;
17554 HOST_WIDE_INT mask = d->mask;
17556 if ((mask & builtin_mask) != mask)
17558 if (TARGET_DEBUG_BUILTIN)
17559 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17560 continue;
17563 if (rs6000_overloaded_builtin_p (d->code))
17565 if (! (type = opaque_ftype_opaque_opaque_opaque))
17566 type = opaque_ftype_opaque_opaque_opaque
17567 = build_function_type_list (opaque_V4SI_type_node,
17568 opaque_V4SI_type_node,
17569 opaque_V4SI_type_node,
17570 opaque_V4SI_type_node,
17571 NULL_TREE);
17573 else
17575 enum insn_code icode = d->icode;
17576 if (d->name == 0)
17578 if (TARGET_DEBUG_BUILTIN)
17579 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17580 (long unsigned)i);
17582 continue;
17585 if (icode == CODE_FOR_nothing)
17587 if (TARGET_DEBUG_BUILTIN)
17588 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17589 d->name);
17591 continue;
17594 type = builtin_function_type (insn_data[icode].operand[0].mode,
17595 insn_data[icode].operand[1].mode,
17596 insn_data[icode].operand[2].mode,
17597 insn_data[icode].operand[3].mode,
17598 d->code, d->name);
17601 def_builtin (d->name, type, d->code);
17604 /* Add the binary operators. */
17605 d = bdesc_2arg;
17606 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17608 machine_mode mode0, mode1, mode2;
17609 tree type;
17610 HOST_WIDE_INT mask = d->mask;
17612 if ((mask & builtin_mask) != mask)
17614 if (TARGET_DEBUG_BUILTIN)
17615 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17616 continue;
17619 if (rs6000_overloaded_builtin_p (d->code))
17621 if (! (type = opaque_ftype_opaque_opaque))
17622 type = opaque_ftype_opaque_opaque
17623 = build_function_type_list (opaque_V4SI_type_node,
17624 opaque_V4SI_type_node,
17625 opaque_V4SI_type_node,
17626 NULL_TREE);
17628 else
17630 enum insn_code icode = d->icode;
17631 if (d->name == 0)
17633 if (TARGET_DEBUG_BUILTIN)
17634 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17635 (long unsigned)i);
17637 continue;
17640 if (icode == CODE_FOR_nothing)
17642 if (TARGET_DEBUG_BUILTIN)
17643 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17644 d->name);
17646 continue;
17649 mode0 = insn_data[icode].operand[0].mode;
17650 mode1 = insn_data[icode].operand[1].mode;
17651 mode2 = insn_data[icode].operand[2].mode;
17653 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17654 d->code, d->name);
17657 def_builtin (d->name, type, d->code);
17660 /* Add the simple unary operators. */
17661 d = bdesc_1arg;
17662 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17664 machine_mode mode0, mode1;
17665 tree type;
17666 HOST_WIDE_INT mask = d->mask;
17668 if ((mask & builtin_mask) != mask)
17670 if (TARGET_DEBUG_BUILTIN)
17671 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17672 continue;
17675 if (rs6000_overloaded_builtin_p (d->code))
17677 if (! (type = opaque_ftype_opaque))
17678 type = opaque_ftype_opaque
17679 = build_function_type_list (opaque_V4SI_type_node,
17680 opaque_V4SI_type_node,
17681 NULL_TREE);
17683 else
17685 enum insn_code icode = d->icode;
17686 if (d->name == 0)
17688 if (TARGET_DEBUG_BUILTIN)
17689 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17690 (long unsigned)i);
17692 continue;
17695 if (icode == CODE_FOR_nothing)
17697 if (TARGET_DEBUG_BUILTIN)
17698 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17699 d->name);
17701 continue;
17704 mode0 = insn_data[icode].operand[0].mode;
17705 mode1 = insn_data[icode].operand[1].mode;
17707 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17708 d->code, d->name);
17711 def_builtin (d->name, type, d->code);
17714 /* Add the simple no-argument operators. */
17715 d = bdesc_0arg;
17716 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17718 machine_mode mode0;
17719 tree type;
17720 HOST_WIDE_INT mask = d->mask;
17722 if ((mask & builtin_mask) != mask)
17724 if (TARGET_DEBUG_BUILTIN)
17725 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17726 continue;
17728 if (rs6000_overloaded_builtin_p (d->code))
17730 if (!opaque_ftype_opaque)
17731 opaque_ftype_opaque
17732 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17733 type = opaque_ftype_opaque;
17735 else
17737 enum insn_code icode = d->icode;
17738 if (d->name == 0)
17740 if (TARGET_DEBUG_BUILTIN)
17741 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17742 (long unsigned) i);
17743 continue;
17745 if (icode == CODE_FOR_nothing)
17747 if (TARGET_DEBUG_BUILTIN)
17748 fprintf (stderr,
17749 "rs6000_builtin, skip no-argument %s (no code)\n",
17750 d->name);
17751 continue;
17753 mode0 = insn_data[icode].operand[0].mode;
17754 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17755 d->code, d->name);
17757 def_builtin (d->name, type, d->code);
17761 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17762 static void
17763 init_float128_ibm (machine_mode mode)
17765 if (!TARGET_XL_COMPAT)
17767 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17768 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17769 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17770 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17772 if (!TARGET_HARD_FLOAT)
17774 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17775 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17776 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17777 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17778 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17779 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17780 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17781 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17783 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17784 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17785 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17786 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17787 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17788 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17789 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17790 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17793 else
17795 set_optab_libfunc (add_optab, mode, "_xlqadd");
17796 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17797 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17798 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17801 /* Add various conversions for IFmode to use the traditional TFmode
17802 names. */
17803 if (mode == IFmode)
17805 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
17806 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
17807 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
17808 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
17809 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
17810 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
17812 if (TARGET_POWERPC64)
17814 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17815 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17816 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17817 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17822 /* Create a decl for either complex long double multiply or complex long double
17823 divide when long double is IEEE 128-bit floating point. We can't use
17824 __multc3 and __divtc3 because the original long double using IBM extended
17825 double used those names. The complex multiply/divide functions are encoded
17826 as builtin functions with a complex result and 4 scalar inputs. */
17828 static void
17829 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17831 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17832 name, NULL_TREE);
17834 set_builtin_decl (fncode, fndecl, true);
17836 if (TARGET_DEBUG_BUILTIN)
17837 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17839 return;
17842 /* Set up IEEE 128-bit floating point routines. Use different names if the
17843 arguments can be passed in a vector register. The historical PowerPC
17844 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17845 continue to use that if we aren't using vector registers to pass IEEE
17846 128-bit floating point. */
17848 static void
17849 init_float128_ieee (machine_mode mode)
17851 if (FLOAT128_VECTOR_P (mode))
17853 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. */
17854 if (mode == TFmode && TARGET_IEEEQUAD)
17856 built_in_function fncode_mul =
17857 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17858 - MIN_MODE_COMPLEX_FLOAT);
17859 built_in_function fncode_div =
17860 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17861 - MIN_MODE_COMPLEX_FLOAT);
17863 tree fntype = build_function_type_list (complex_long_double_type_node,
17864 long_double_type_node,
17865 long_double_type_node,
17866 long_double_type_node,
17867 long_double_type_node,
17868 NULL_TREE);
17870 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17871 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17874 set_optab_libfunc (add_optab, mode, "__addkf3");
17875 set_optab_libfunc (sub_optab, mode, "__subkf3");
17876 set_optab_libfunc (neg_optab, mode, "__negkf2");
17877 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17878 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17879 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17880 set_optab_libfunc (abs_optab, mode, "__abskf2");
17881 set_optab_libfunc (powi_optab, mode, "__powikf2");
17883 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17884 set_optab_libfunc (ne_optab, mode, "__nekf2");
17885 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17886 set_optab_libfunc (ge_optab, mode, "__gekf2");
17887 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17888 set_optab_libfunc (le_optab, mode, "__lekf2");
17889 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17891 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17892 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17893 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17894 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17896 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17897 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17898 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17900 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17901 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17902 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17904 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
17905 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
17906 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
17907 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
17908 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
17909 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
17911 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17912 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17913 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17914 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17916 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17917 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17918 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17919 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17921 if (TARGET_POWERPC64)
17923 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17924 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17925 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17926 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17930 else
17932 set_optab_libfunc (add_optab, mode, "_q_add");
17933 set_optab_libfunc (sub_optab, mode, "_q_sub");
17934 set_optab_libfunc (neg_optab, mode, "_q_neg");
17935 set_optab_libfunc (smul_optab, mode, "_q_mul");
17936 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17937 if (TARGET_PPC_GPOPT)
17938 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17940 set_optab_libfunc (eq_optab, mode, "_q_feq");
17941 set_optab_libfunc (ne_optab, mode, "_q_fne");
17942 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17943 set_optab_libfunc (ge_optab, mode, "_q_fge");
17944 set_optab_libfunc (lt_optab, mode, "_q_flt");
17945 set_optab_libfunc (le_optab, mode, "_q_fle");
17947 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17948 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17949 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17950 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17951 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17952 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17953 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17954 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
17958 static void
17959 rs6000_init_libfuncs (void)
17961 /* __float128 support. */
17962 if (TARGET_FLOAT128_TYPE)
17964 init_float128_ibm (IFmode);
17965 init_float128_ieee (KFmode);
17968 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17969 if (TARGET_LONG_DOUBLE_128)
17971 if (!TARGET_IEEEQUAD)
17972 init_float128_ibm (TFmode);
17974 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17975 else
17976 init_float128_ieee (TFmode);
17980 /* Emit a potentially record-form instruction, setting DST from SRC.
17981 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17982 signed comparison of DST with zero. If DOT is 1, the generated RTL
17983 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17984 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17985 a separate COMPARE. */
17987 void
17988 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17990 if (dot == 0)
17992 emit_move_insn (dst, src);
17993 return;
17996 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17998 emit_move_insn (dst, src);
17999 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18000 return;
18003 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18004 if (dot == 1)
18006 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18009 else
18011 rtx set = gen_rtx_SET (dst, src);
18012 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18017 /* A validation routine: say whether CODE, a condition code, and MODE
18018 match. The other alternatives either don't make sense or should
18019 never be generated. */
18021 void
18022 validate_condition_mode (enum rtx_code code, machine_mode mode)
18024 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18025 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18026 && GET_MODE_CLASS (mode) == MODE_CC);
18028 /* These don't make sense. */
18029 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18030 || mode != CCUNSmode);
18032 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18033 || mode == CCUNSmode);
18035 gcc_assert (mode == CCFPmode
18036 || (code != ORDERED && code != UNORDERED
18037 && code != UNEQ && code != LTGT
18038 && code != UNGT && code != UNLT
18039 && code != UNGE && code != UNLE));
18041 /* These should never be generated except for
18042 flag_finite_math_only. */
18043 gcc_assert (mode != CCFPmode
18044 || flag_finite_math_only
18045 || (code != LE && code != GE
18046 && code != UNEQ && code != LTGT
18047 && code != UNGT && code != UNLT));
18049 /* These are invalid; the information is not there. */
18050 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18054 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18055 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18056 not zero, store there the bit offset (counted from the right) where
18057 the single stretch of 1 bits begins; and similarly for B, the bit
18058 offset where it ends. */
18060 bool
18061 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18063 unsigned HOST_WIDE_INT val = INTVAL (mask);
18064 unsigned HOST_WIDE_INT bit;
18065 int nb, ne;
18066 int n = GET_MODE_PRECISION (mode);
18068 if (mode != DImode && mode != SImode)
18069 return false;
18071 if (INTVAL (mask) >= 0)
18073 bit = val & -val;
18074 ne = exact_log2 (bit);
18075 nb = exact_log2 (val + bit);
18077 else if (val + 1 == 0)
18079 nb = n;
18080 ne = 0;
18082 else if (val & 1)
18084 val = ~val;
18085 bit = val & -val;
18086 nb = exact_log2 (bit);
18087 ne = exact_log2 (val + bit);
18089 else
18091 bit = val & -val;
18092 ne = exact_log2 (bit);
18093 if (val + bit == 0)
18094 nb = n;
18095 else
18096 nb = 0;
18099 nb--;
18101 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18102 return false;
18104 if (b)
18105 *b = nb;
18106 if (e)
18107 *e = ne;
18109 return true;
18112 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18113 or rldicr instruction, to implement an AND with it in mode MODE. */
18115 bool
18116 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18118 int nb, ne;
18120 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18121 return false;
18123 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18124 does not wrap. */
18125 if (mode == DImode)
18126 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18128 /* For SImode, rlwinm can do everything. */
18129 if (mode == SImode)
18130 return (nb < 32 && ne < 32);
18132 return false;
18135 /* Return the instruction template for an AND with mask in mode MODE, with
18136 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18138 const char *
18139 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18141 int nb, ne;
18143 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18144 gcc_unreachable ();
18146 if (mode == DImode && ne == 0)
18148 operands[3] = GEN_INT (63 - nb);
18149 if (dot)
18150 return "rldicl. %0,%1,0,%3";
18151 return "rldicl %0,%1,0,%3";
18154 if (mode == DImode && nb == 63)
18156 operands[3] = GEN_INT (63 - ne);
18157 if (dot)
18158 return "rldicr. %0,%1,0,%3";
18159 return "rldicr %0,%1,0,%3";
18162 if (nb < 32 && ne < 32)
18164 operands[3] = GEN_INT (31 - nb);
18165 operands[4] = GEN_INT (31 - ne);
18166 if (dot)
18167 return "rlwinm. %0,%1,0,%3,%4";
18168 return "rlwinm %0,%1,0,%3,%4";
18171 gcc_unreachable ();
18174 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18175 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18176 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18178 bool
18179 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18181 int nb, ne;
18183 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18184 return false;
18186 int n = GET_MODE_PRECISION (mode);
18187 int sh = -1;
18189 if (CONST_INT_P (XEXP (shift, 1)))
18191 sh = INTVAL (XEXP (shift, 1));
18192 if (sh < 0 || sh >= n)
18193 return false;
18196 rtx_code code = GET_CODE (shift);
18198 /* Convert any shift by 0 to a rotate, to simplify below code. */
18199 if (sh == 0)
18200 code = ROTATE;
18202 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18203 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18204 code = ASHIFT;
18205 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18207 code = LSHIFTRT;
18208 sh = n - sh;
18211 /* DImode rotates need rld*. */
18212 if (mode == DImode && code == ROTATE)
18213 return (nb == 63 || ne == 0 || ne == sh);
18215 /* SImode rotates need rlw*. */
18216 if (mode == SImode && code == ROTATE)
18217 return (nb < 32 && ne < 32 && sh < 32);
18219 /* Wrap-around masks are only okay for rotates. */
18220 if (ne > nb)
18221 return false;
18223 /* Variable shifts are only okay for rotates. */
18224 if (sh < 0)
18225 return false;
18227 /* Don't allow ASHIFT if the mask is wrong for that. */
18228 if (code == ASHIFT && ne < sh)
18229 return false;
18231 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18232 if the mask is wrong for that. */
18233 if (nb < 32 && ne < 32 && sh < 32
18234 && !(code == LSHIFTRT && nb >= 32 - sh))
18235 return true;
18237 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18238 if the mask is wrong for that. */
18239 if (code == LSHIFTRT)
18240 sh = 64 - sh;
18241 if (nb == 63 || ne == 0 || ne == sh)
18242 return !(code == LSHIFTRT && nb >= sh);
18244 return false;
18247 /* Return the instruction template for a shift with mask in mode MODE, with
18248 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18250 const char *
18251 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18253 int nb, ne;
18255 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18256 gcc_unreachable ();
18258 if (mode == DImode && ne == 0)
18260 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18261 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18262 operands[3] = GEN_INT (63 - nb);
18263 if (dot)
18264 return "rld%I2cl. %0,%1,%2,%3";
18265 return "rld%I2cl %0,%1,%2,%3";
18268 if (mode == DImode && nb == 63)
18270 operands[3] = GEN_INT (63 - ne);
18271 if (dot)
18272 return "rld%I2cr. %0,%1,%2,%3";
18273 return "rld%I2cr %0,%1,%2,%3";
18276 if (mode == DImode
18277 && GET_CODE (operands[4]) != LSHIFTRT
18278 && CONST_INT_P (operands[2])
18279 && ne == INTVAL (operands[2]))
18281 operands[3] = GEN_INT (63 - nb);
18282 if (dot)
18283 return "rld%I2c. %0,%1,%2,%3";
18284 return "rld%I2c %0,%1,%2,%3";
18287 if (nb < 32 && ne < 32)
18289 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18290 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18291 operands[3] = GEN_INT (31 - nb);
18292 operands[4] = GEN_INT (31 - ne);
18293 /* This insn can also be a 64-bit rotate with mask that really makes
18294 it just a shift right (with mask); the %h below are to adjust for
18295 that situation (shift count is >= 32 in that case). */
18296 if (dot)
18297 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18298 return "rlw%I2nm %0,%1,%h2,%3,%4";
18301 gcc_unreachable ();
18304 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18305 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18306 ASHIFT, or LSHIFTRT) in mode MODE. */
18308 bool
18309 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18311 int nb, ne;
18313 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18314 return false;
18316 int n = GET_MODE_PRECISION (mode);
18318 int sh = INTVAL (XEXP (shift, 1));
18319 if (sh < 0 || sh >= n)
18320 return false;
18322 rtx_code code = GET_CODE (shift);
18324 /* Convert any shift by 0 to a rotate, to simplify below code. */
18325 if (sh == 0)
18326 code = ROTATE;
18328 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18329 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18330 code = ASHIFT;
18331 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18333 code = LSHIFTRT;
18334 sh = n - sh;
18337 /* DImode rotates need rldimi. */
18338 if (mode == DImode && code == ROTATE)
18339 return (ne == sh);
18341 /* SImode rotates need rlwimi. */
18342 if (mode == SImode && code == ROTATE)
18343 return (nb < 32 && ne < 32 && sh < 32);
18345 /* Wrap-around masks are only okay for rotates. */
18346 if (ne > nb)
18347 return false;
18349 /* Don't allow ASHIFT if the mask is wrong for that. */
18350 if (code == ASHIFT && ne < sh)
18351 return false;
18353 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18354 if the mask is wrong for that. */
18355 if (nb < 32 && ne < 32 && sh < 32
18356 && !(code == LSHIFTRT && nb >= 32 - sh))
18357 return true;
18359 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18360 if the mask is wrong for that. */
18361 if (code == LSHIFTRT)
18362 sh = 64 - sh;
18363 if (ne == sh)
18364 return !(code == LSHIFTRT && nb >= sh);
18366 return false;
18369 /* Return the instruction template for an insert with mask in mode MODE, with
18370 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18372 const char *
18373 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18375 int nb, ne;
18377 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18378 gcc_unreachable ();
18380 /* Prefer rldimi because rlwimi is cracked. */
18381 if (TARGET_POWERPC64
18382 && (!dot || mode == DImode)
18383 && GET_CODE (operands[4]) != LSHIFTRT
18384 && ne == INTVAL (operands[2]))
18386 operands[3] = GEN_INT (63 - nb);
18387 if (dot)
18388 return "rldimi. %0,%1,%2,%3";
18389 return "rldimi %0,%1,%2,%3";
18392 if (nb < 32 && ne < 32)
18394 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18395 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18396 operands[3] = GEN_INT (31 - nb);
18397 operands[4] = GEN_INT (31 - ne);
18398 if (dot)
18399 return "rlwimi. %0,%1,%2,%3,%4";
18400 return "rlwimi %0,%1,%2,%3,%4";
18403 gcc_unreachable ();
18406 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18407 using two machine instructions. */
18409 bool
18410 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18412 /* There are two kinds of AND we can handle with two insns:
18413 1) those we can do with two rl* insn;
18414 2) ori[s];xori[s].
18416 We do not handle that last case yet. */
18418 /* If there is just one stretch of ones, we can do it. */
18419 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18420 return true;
18422 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18423 one insn, we can do the whole thing with two. */
18424 unsigned HOST_WIDE_INT val = INTVAL (c);
18425 unsigned HOST_WIDE_INT bit1 = val & -val;
18426 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18427 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18428 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18429 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18432 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18433 If EXPAND is true, split rotate-and-mask instructions we generate to
18434 their constituent parts as well (this is used during expand); if DOT
18435 is 1, make the last insn a record-form instruction clobbering the
18436 destination GPR and setting the CC reg (from operands[3]); if 2, set
18437 that GPR as well as the CC reg. */
18439 void
18440 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18442 gcc_assert (!(expand && dot));
18444 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18446 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18447 shift right. This generates better code than doing the masks without
18448 shifts, or shifting first right and then left. */
18449 int nb, ne;
18450 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18452 gcc_assert (mode == DImode);
18454 int shift = 63 - nb;
18455 if (expand)
18457 rtx tmp1 = gen_reg_rtx (DImode);
18458 rtx tmp2 = gen_reg_rtx (DImode);
18459 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18460 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18461 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18463 else
18465 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18466 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18467 emit_move_insn (operands[0], tmp);
18468 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18469 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18471 return;
18474 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18475 that does the rest. */
18476 unsigned HOST_WIDE_INT bit1 = val & -val;
18477 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18478 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18479 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18481 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18482 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18484 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18486 /* Two "no-rotate"-and-mask instructions, for SImode. */
18487 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18489 gcc_assert (mode == SImode);
18491 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18492 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18493 emit_move_insn (reg, tmp);
18494 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18495 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18496 return;
18499 gcc_assert (mode == DImode);
18501 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18502 insns; we have to do the first in SImode, because it wraps. */
18503 if (mask2 <= 0xffffffff
18504 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18506 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18507 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18508 GEN_INT (mask1));
18509 rtx reg_low = gen_lowpart (SImode, reg);
18510 emit_move_insn (reg_low, tmp);
18511 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18512 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18513 return;
18516 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18517 at the top end), rotate back and clear the other hole. */
18518 int right = exact_log2 (bit3);
18519 int left = 64 - right;
18521 /* Rotate the mask too. */
18522 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18524 if (expand)
18526 rtx tmp1 = gen_reg_rtx (DImode);
18527 rtx tmp2 = gen_reg_rtx (DImode);
18528 rtx tmp3 = gen_reg_rtx (DImode);
18529 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18530 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18531 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18532 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18534 else
18536 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18537 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18538 emit_move_insn (operands[0], tmp);
18539 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18540 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18541 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18545 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18546 for lfq and stfq insns iff the registers are hard registers. */
18549 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18551 /* We might have been passed a SUBREG. */
18552 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
18553 return 0;
18555 /* We might have been passed non floating point registers. */
18556 if (!FP_REGNO_P (REGNO (reg1))
18557 || !FP_REGNO_P (REGNO (reg2)))
18558 return 0;
18560 return (REGNO (reg1) == REGNO (reg2) - 1);
18563 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18564 addr1 and addr2 must be in consecutive memory locations
18565 (addr2 == addr1 + 8). */
18568 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18570 rtx addr1, addr2;
18571 unsigned int reg1, reg2;
18572 int offset1, offset2;
18574 /* The mems cannot be volatile. */
18575 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18576 return 0;
18578 addr1 = XEXP (mem1, 0);
18579 addr2 = XEXP (mem2, 0);
18581 /* Extract an offset (if used) from the first addr. */
18582 if (GET_CODE (addr1) == PLUS)
18584 /* If not a REG, return zero. */
18585 if (GET_CODE (XEXP (addr1, 0)) != REG)
18586 return 0;
18587 else
18589 reg1 = REGNO (XEXP (addr1, 0));
18590 /* The offset must be constant! */
18591 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
18592 return 0;
18593 offset1 = INTVAL (XEXP (addr1, 1));
18596 else if (GET_CODE (addr1) != REG)
18597 return 0;
18598 else
18600 reg1 = REGNO (addr1);
18601 /* This was a simple (mem (reg)) expression. Offset is 0. */
18602 offset1 = 0;
18605 /* And now for the second addr. */
18606 if (GET_CODE (addr2) == PLUS)
18608 /* If not a REG, return zero. */
18609 if (GET_CODE (XEXP (addr2, 0)) != REG)
18610 return 0;
18611 else
18613 reg2 = REGNO (XEXP (addr2, 0));
18614 /* The offset must be constant. */
18615 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
18616 return 0;
18617 offset2 = INTVAL (XEXP (addr2, 1));
18620 else if (GET_CODE (addr2) != REG)
18621 return 0;
18622 else
18624 reg2 = REGNO (addr2);
18625 /* This was a simple (mem (reg)) expression. Offset is 0. */
18626 offset2 = 0;
18629 /* Both of these must have the same base register. */
18630 if (reg1 != reg2)
18631 return 0;
18633 /* The offset for the second addr must be 8 more than the first addr. */
18634 if (offset2 != offset1 + 8)
18635 return 0;
18637 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18638 instructions. */
18639 return 1;
18642 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18643 need to use DDmode, in all other cases we can use the same mode. */
18644 static machine_mode
18645 rs6000_secondary_memory_needed_mode (machine_mode mode)
18647 if (lra_in_progress && mode == SDmode)
18648 return DDmode;
18649 return mode;
18652 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18653 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18654 only work on the traditional altivec registers, note if an altivec register
18655 was chosen. */
18657 static enum rs6000_reg_type
18658 register_to_reg_type (rtx reg, bool *is_altivec)
18660 HOST_WIDE_INT regno;
18661 enum reg_class rclass;
18663 if (GET_CODE (reg) == SUBREG)
18664 reg = SUBREG_REG (reg);
18666 if (!REG_P (reg))
18667 return NO_REG_TYPE;
18669 regno = REGNO (reg);
18670 if (regno >= FIRST_PSEUDO_REGISTER)
18672 if (!lra_in_progress && !reload_completed)
18673 return PSEUDO_REG_TYPE;
18675 regno = true_regnum (reg);
18676 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
18677 return PSEUDO_REG_TYPE;
18680 gcc_assert (regno >= 0);
18682 if (is_altivec && ALTIVEC_REGNO_P (regno))
18683 *is_altivec = true;
18685 rclass = rs6000_regno_regclass[regno];
18686 return reg_class_to_reg_type[(int)rclass];
18689 /* Helper function to return the cost of adding a TOC entry address. */
18691 static inline int
18692 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18694 int ret;
18696 if (TARGET_CMODEL != CMODEL_SMALL)
18697 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18699 else
18700 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18702 return ret;
18705 /* Helper function for rs6000_secondary_reload to determine whether the memory
18706 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18707 needs reloading. Return negative if the memory is not handled by the memory
18708 helper functions and to try a different reload method, 0 if no additional
18709 instructions are need, and positive to give the extra cost for the
18710 memory. */
18712 static int
18713 rs6000_secondary_reload_memory (rtx addr,
18714 enum reg_class rclass,
18715 machine_mode mode)
18717 int extra_cost = 0;
18718 rtx reg, and_arg, plus_arg0, plus_arg1;
18719 addr_mask_type addr_mask;
18720 const char *type = NULL;
18721 const char *fail_msg = NULL;
18723 if (GPR_REG_CLASS_P (rclass))
18724 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18726 else if (rclass == FLOAT_REGS)
18727 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18729 else if (rclass == ALTIVEC_REGS)
18730 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18732 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18733 else if (rclass == VSX_REGS)
18734 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18735 & ~RELOAD_REG_AND_M16);
18737 /* If the register allocator hasn't made up its mind yet on the register
18738 class to use, settle on defaults to use. */
18739 else if (rclass == NO_REGS)
18741 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18742 & ~RELOAD_REG_AND_M16);
18744 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18745 addr_mask &= ~(RELOAD_REG_INDEXED
18746 | RELOAD_REG_PRE_INCDEC
18747 | RELOAD_REG_PRE_MODIFY);
18750 else
18751 addr_mask = 0;
18753 /* If the register isn't valid in this register class, just return now. */
18754 if ((addr_mask & RELOAD_REG_VALID) == 0)
18756 if (TARGET_DEBUG_ADDR)
18758 fprintf (stderr,
18759 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18760 "not valid in class\n",
18761 GET_MODE_NAME (mode), reg_class_names[rclass]);
18762 debug_rtx (addr);
18765 return -1;
18768 switch (GET_CODE (addr))
18770 /* Does the register class supports auto update forms for this mode? We
18771 don't need a scratch register, since the powerpc only supports
18772 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18773 case PRE_INC:
18774 case PRE_DEC:
18775 reg = XEXP (addr, 0);
18776 if (!base_reg_operand (addr, GET_MODE (reg)))
18778 fail_msg = "no base register #1";
18779 extra_cost = -1;
18782 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18784 extra_cost = 1;
18785 type = "update";
18787 break;
18789 case PRE_MODIFY:
18790 reg = XEXP (addr, 0);
18791 plus_arg1 = XEXP (addr, 1);
18792 if (!base_reg_operand (reg, GET_MODE (reg))
18793 || GET_CODE (plus_arg1) != PLUS
18794 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18796 fail_msg = "bad PRE_MODIFY";
18797 extra_cost = -1;
18800 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18802 extra_cost = 1;
18803 type = "update";
18805 break;
18807 /* Do we need to simulate AND -16 to clear the bottom address bits used
18808 in VMX load/stores? Only allow the AND for vector sizes. */
18809 case AND:
18810 and_arg = XEXP (addr, 0);
18811 if (GET_MODE_SIZE (mode) != 16
18812 || GET_CODE (XEXP (addr, 1)) != CONST_INT
18813 || INTVAL (XEXP (addr, 1)) != -16)
18815 fail_msg = "bad Altivec AND #1";
18816 extra_cost = -1;
18819 if (rclass != ALTIVEC_REGS)
18821 if (legitimate_indirect_address_p (and_arg, false))
18822 extra_cost = 1;
18824 else if (legitimate_indexed_address_p (and_arg, false))
18825 extra_cost = 2;
18827 else
18829 fail_msg = "bad Altivec AND #2";
18830 extra_cost = -1;
18833 type = "and";
18835 break;
18837 /* If this is an indirect address, make sure it is a base register. */
18838 case REG:
18839 case SUBREG:
18840 if (!legitimate_indirect_address_p (addr, false))
18842 extra_cost = 1;
18843 type = "move";
18845 break;
18847 /* If this is an indexed address, make sure the register class can handle
18848 indexed addresses for this mode. */
18849 case PLUS:
18850 plus_arg0 = XEXP (addr, 0);
18851 plus_arg1 = XEXP (addr, 1);
18853 /* (plus (plus (reg) (constant)) (constant)) is generated during
18854 push_reload processing, so handle it now. */
18855 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18857 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18859 extra_cost = 1;
18860 type = "offset";
18864 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18865 push_reload processing, so handle it now. */
18866 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18868 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18870 extra_cost = 1;
18871 type = "indexed #2";
18875 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18877 fail_msg = "no base register #2";
18878 extra_cost = -1;
18881 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18883 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18884 || !legitimate_indexed_address_p (addr, false))
18886 extra_cost = 1;
18887 type = "indexed";
18891 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18892 && CONST_INT_P (plus_arg1))
18894 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18896 extra_cost = 1;
18897 type = "vector d-form offset";
18901 /* Make sure the register class can handle offset addresses. */
18902 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18904 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18906 extra_cost = 1;
18907 type = "offset #2";
18911 else
18913 fail_msg = "bad PLUS";
18914 extra_cost = -1;
18917 break;
18919 case LO_SUM:
18920 /* Quad offsets are restricted and can't handle normal addresses. */
18921 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18923 extra_cost = -1;
18924 type = "vector d-form lo_sum";
18927 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18929 fail_msg = "bad LO_SUM";
18930 extra_cost = -1;
18933 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18935 extra_cost = 1;
18936 type = "lo_sum";
18938 break;
18940 /* Static addresses need to create a TOC entry. */
18941 case CONST:
18942 case SYMBOL_REF:
18943 case LABEL_REF:
18944 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18946 extra_cost = -1;
18947 type = "vector d-form lo_sum #2";
18950 else
18952 type = "address";
18953 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18955 break;
18957 /* TOC references look like offsetable memory. */
18958 case UNSPEC:
18959 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18961 fail_msg = "bad UNSPEC";
18962 extra_cost = -1;
18965 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18967 extra_cost = -1;
18968 type = "vector d-form lo_sum #3";
18971 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18973 extra_cost = 1;
18974 type = "toc reference";
18976 break;
18978 default:
18980 fail_msg = "bad address";
18981 extra_cost = -1;
18985 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18987 if (extra_cost < 0)
18988 fprintf (stderr,
18989 "rs6000_secondary_reload_memory error: mode = %s, "
18990 "class = %s, addr_mask = '%s', %s\n",
18991 GET_MODE_NAME (mode),
18992 reg_class_names[rclass],
18993 rs6000_debug_addr_mask (addr_mask, false),
18994 (fail_msg != NULL) ? fail_msg : "<bad address>");
18996 else
18997 fprintf (stderr,
18998 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18999 "addr_mask = '%s', extra cost = %d, %s\n",
19000 GET_MODE_NAME (mode),
19001 reg_class_names[rclass],
19002 rs6000_debug_addr_mask (addr_mask, false),
19003 extra_cost,
19004 (type) ? type : "<none>");
19006 debug_rtx (addr);
19009 return extra_cost;
19012 /* Helper function for rs6000_secondary_reload to return true if a move to a
19013 different register classe is really a simple move. */
19015 static bool
19016 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19017 enum rs6000_reg_type from_type,
19018 machine_mode mode)
19020 int size = GET_MODE_SIZE (mode);
19022 /* Add support for various direct moves available. In this function, we only
19023 look at cases where we don't need any extra registers, and one or more
19024 simple move insns are issued. Originally small integers are not allowed
19025 in FPR/VSX registers. Single precision binary floating is not a simple
19026 move because we need to convert to the single precision memory layout.
19027 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19028 need special direct move handling, which we do not support yet. */
19029 if (TARGET_DIRECT_MOVE
19030 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19031 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19033 if (TARGET_POWERPC64)
19035 /* ISA 2.07: MTVSRD or MVFVSRD. */
19036 if (size == 8)
19037 return true;
19039 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19040 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19041 return true;
19044 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19045 if (TARGET_P8_VECTOR)
19047 if (mode == SImode)
19048 return true;
19050 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19051 return true;
19054 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19055 if (mode == SDmode)
19056 return true;
19059 /* Power6+: MFTGPR or MFFGPR. */
19060 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19061 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19062 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19063 return true;
19065 /* Move to/from SPR. */
19066 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19067 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19068 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19069 return true;
19071 return false;
19074 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19075 special direct moves that involve allocating an extra register, return the
19076 insn code of the helper function if there is such a function or
19077 CODE_FOR_nothing if not. */
19079 static bool
19080 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19081 enum rs6000_reg_type from_type,
19082 machine_mode mode,
19083 secondary_reload_info *sri,
19084 bool altivec_p)
19086 bool ret = false;
19087 enum insn_code icode = CODE_FOR_nothing;
19088 int cost = 0;
19089 int size = GET_MODE_SIZE (mode);
19091 if (TARGET_POWERPC64 && size == 16)
19093 /* Handle moving 128-bit values from GPRs to VSX point registers on
19094 ISA 2.07 (power8, power9) when running in 64-bit mode using
19095 XXPERMDI to glue the two 64-bit values back together. */
19096 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19098 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19099 icode = reg_addr[mode].reload_vsx_gpr;
19102 /* Handle moving 128-bit values from VSX point registers to GPRs on
19103 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19104 bottom 64-bit value. */
19105 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19107 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19108 icode = reg_addr[mode].reload_gpr_vsx;
19112 else if (TARGET_POWERPC64 && mode == SFmode)
19114 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19116 cost = 3; /* xscvdpspn, mfvsrd, and. */
19117 icode = reg_addr[mode].reload_gpr_vsx;
19120 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19122 cost = 2; /* mtvsrz, xscvspdpn. */
19123 icode = reg_addr[mode].reload_vsx_gpr;
19127 else if (!TARGET_POWERPC64 && size == 8)
19129 /* Handle moving 64-bit values from GPRs to floating point registers on
19130 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19131 32-bit values back together. Altivec register classes must be handled
19132 specially since a different instruction is used, and the secondary
19133 reload support requires a single instruction class in the scratch
19134 register constraint. However, right now TFmode is not allowed in
19135 Altivec registers, so the pattern will never match. */
19136 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19138 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19139 icode = reg_addr[mode].reload_fpr_gpr;
19143 if (icode != CODE_FOR_nothing)
19145 ret = true;
19146 if (sri)
19148 sri->icode = icode;
19149 sri->extra_cost = cost;
19153 return ret;
19156 /* Return whether a move between two register classes can be done either
19157 directly (simple move) or via a pattern that uses a single extra temporary
19158 (using ISA 2.07's direct move in this case. */
19160 static bool
19161 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19162 enum rs6000_reg_type from_type,
19163 machine_mode mode,
19164 secondary_reload_info *sri,
19165 bool altivec_p)
19167 /* Fall back to load/store reloads if either type is not a register. */
19168 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19169 return false;
19171 /* If we haven't allocated registers yet, assume the move can be done for the
19172 standard register types. */
19173 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19174 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19175 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19176 return true;
19178 /* Moves to the same set of registers is a simple move for non-specialized
19179 registers. */
19180 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19181 return true;
19183 /* Check whether a simple move can be done directly. */
19184 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19186 if (sri)
19188 sri->icode = CODE_FOR_nothing;
19189 sri->extra_cost = 0;
19191 return true;
19194 /* Now check if we can do it in a few steps. */
19195 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19196 altivec_p);
19199 /* Inform reload about cases where moving X with a mode MODE to a register in
19200 RCLASS requires an extra scratch or immediate register. Return the class
19201 needed for the immediate register.
19203 For VSX and Altivec, we may need a register to convert sp+offset into
19204 reg+sp.
19206 For misaligned 64-bit gpr loads and stores we need a register to
19207 convert an offset address to indirect. */
19209 static reg_class_t
19210 rs6000_secondary_reload (bool in_p,
19211 rtx x,
19212 reg_class_t rclass_i,
19213 machine_mode mode,
19214 secondary_reload_info *sri)
19216 enum reg_class rclass = (enum reg_class) rclass_i;
19217 reg_class_t ret = ALL_REGS;
19218 enum insn_code icode;
19219 bool default_p = false;
19220 bool done_p = false;
19222 /* Allow subreg of memory before/during reload. */
19223 bool memory_p = (MEM_P (x)
19224 || (!reload_completed && GET_CODE (x) == SUBREG
19225 && MEM_P (SUBREG_REG (x))));
19227 sri->icode = CODE_FOR_nothing;
19228 sri->t_icode = CODE_FOR_nothing;
19229 sri->extra_cost = 0;
19230 icode = ((in_p)
19231 ? reg_addr[mode].reload_load
19232 : reg_addr[mode].reload_store);
19234 if (REG_P (x) || register_operand (x, mode))
19236 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19237 bool altivec_p = (rclass == ALTIVEC_REGS);
19238 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19240 if (!in_p)
19241 std::swap (to_type, from_type);
19243 /* Can we do a direct move of some sort? */
19244 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19245 altivec_p))
19247 icode = (enum insn_code)sri->icode;
19248 default_p = false;
19249 done_p = true;
19250 ret = NO_REGS;
19254 /* Make sure 0.0 is not reloaded or forced into memory. */
19255 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19257 ret = NO_REGS;
19258 default_p = false;
19259 done_p = true;
19262 /* If this is a scalar floating point value and we want to load it into the
19263 traditional Altivec registers, do it via a move via a traditional floating
19264 point register, unless we have D-form addressing. Also make sure that
19265 non-zero constants use a FPR. */
19266 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19267 && !mode_supports_vmx_dform (mode)
19268 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19269 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
19271 ret = FLOAT_REGS;
19272 default_p = false;
19273 done_p = true;
19276 /* Handle reload of load/stores if we have reload helper functions. */
19277 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19279 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19280 mode);
19282 if (extra_cost >= 0)
19284 done_p = true;
19285 ret = NO_REGS;
19286 if (extra_cost > 0)
19288 sri->extra_cost = extra_cost;
19289 sri->icode = icode;
19294 /* Handle unaligned loads and stores of integer registers. */
19295 if (!done_p && TARGET_POWERPC64
19296 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19297 && memory_p
19298 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19300 rtx addr = XEXP (x, 0);
19301 rtx off = address_offset (addr);
19303 if (off != NULL_RTX)
19305 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19306 unsigned HOST_WIDE_INT offset = INTVAL (off);
19308 /* We need a secondary reload when our legitimate_address_p
19309 says the address is good (as otherwise the entire address
19310 will be reloaded), and the offset is not a multiple of
19311 four or we have an address wrap. Address wrap will only
19312 occur for LO_SUMs since legitimate_offset_address_p
19313 rejects addresses for 16-byte mems that will wrap. */
19314 if (GET_CODE (addr) == LO_SUM
19315 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19316 && ((offset & 3) != 0
19317 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19318 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19319 && (offset & 3) != 0))
19321 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19322 if (in_p)
19323 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19324 : CODE_FOR_reload_di_load);
19325 else
19326 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19327 : CODE_FOR_reload_di_store);
19328 sri->extra_cost = 2;
19329 ret = NO_REGS;
19330 done_p = true;
19332 else
19333 default_p = true;
19335 else
19336 default_p = true;
19339 if (!done_p && !TARGET_POWERPC64
19340 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19341 && memory_p
19342 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19344 rtx addr = XEXP (x, 0);
19345 rtx off = address_offset (addr);
19347 if (off != NULL_RTX)
19349 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19350 unsigned HOST_WIDE_INT offset = INTVAL (off);
19352 /* We need a secondary reload when our legitimate_address_p
19353 says the address is good (as otherwise the entire address
19354 will be reloaded), and we have a wrap.
19356 legitimate_lo_sum_address_p allows LO_SUM addresses to
19357 have any offset so test for wrap in the low 16 bits.
19359 legitimate_offset_address_p checks for the range
19360 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19361 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19362 [0x7ff4,0x7fff] respectively, so test for the
19363 intersection of these ranges, [0x7ffc,0x7fff] and
19364 [0x7ff4,0x7ff7] respectively.
19366 Note that the address we see here may have been
19367 manipulated by legitimize_reload_address. */
19368 if (GET_CODE (addr) == LO_SUM
19369 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19370 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19372 if (in_p)
19373 sri->icode = CODE_FOR_reload_si_load;
19374 else
19375 sri->icode = CODE_FOR_reload_si_store;
19376 sri->extra_cost = 2;
19377 ret = NO_REGS;
19378 done_p = true;
19380 else
19381 default_p = true;
19383 else
19384 default_p = true;
19387 if (!done_p)
19388 default_p = true;
19390 if (default_p)
19391 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19393 gcc_assert (ret != ALL_REGS);
19395 if (TARGET_DEBUG_ADDR)
19397 fprintf (stderr,
19398 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19399 "mode = %s",
19400 reg_class_names[ret],
19401 in_p ? "true" : "false",
19402 reg_class_names[rclass],
19403 GET_MODE_NAME (mode));
19405 if (reload_completed)
19406 fputs (", after reload", stderr);
19408 if (!done_p)
19409 fputs (", done_p not set", stderr);
19411 if (default_p)
19412 fputs (", default secondary reload", stderr);
19414 if (sri->icode != CODE_FOR_nothing)
19415 fprintf (stderr, ", reload func = %s, extra cost = %d",
19416 insn_data[sri->icode].name, sri->extra_cost);
19418 else if (sri->extra_cost > 0)
19419 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19421 fputs ("\n", stderr);
19422 debug_rtx (x);
19425 return ret;
19428 /* Better tracing for rs6000_secondary_reload_inner. */
19430 static void
19431 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19432 bool store_p)
19434 rtx set, clobber;
19436 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19438 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19439 store_p ? "store" : "load");
19441 if (store_p)
19442 set = gen_rtx_SET (mem, reg);
19443 else
19444 set = gen_rtx_SET (reg, mem);
19446 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19447 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19450 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19451 ATTRIBUTE_NORETURN;
19453 static void
19454 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19455 bool store_p)
19457 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19458 gcc_unreachable ();
19461 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19462 reload helper functions. These were identified in
19463 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19464 reload, it calls the insns:
19465 reload_<RELOAD:mode>_<P:mptrsize>_store
19466 reload_<RELOAD:mode>_<P:mptrsize>_load
19468 which in turn calls this function, to do whatever is necessary to create
19469 valid addresses. */
19471 void
19472 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19474 int regno = true_regnum (reg);
19475 machine_mode mode = GET_MODE (reg);
19476 addr_mask_type addr_mask;
19477 rtx addr;
19478 rtx new_addr;
19479 rtx op_reg, op0, op1;
19480 rtx and_op;
19481 rtx cc_clobber;
19482 rtvec rv;
19484 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
19485 || !base_reg_operand (scratch, GET_MODE (scratch)))
19486 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19488 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19489 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19491 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19492 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19494 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19495 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19497 else
19498 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19500 /* Make sure the mode is valid in this register class. */
19501 if ((addr_mask & RELOAD_REG_VALID) == 0)
19502 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19504 if (TARGET_DEBUG_ADDR)
19505 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19507 new_addr = addr = XEXP (mem, 0);
19508 switch (GET_CODE (addr))
19510 /* Does the register class support auto update forms for this mode? If
19511 not, do the update now. We don't need a scratch register, since the
19512 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19513 case PRE_INC:
19514 case PRE_DEC:
19515 op_reg = XEXP (addr, 0);
19516 if (!base_reg_operand (op_reg, Pmode))
19517 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19519 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19521 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
19522 new_addr = op_reg;
19524 break;
19526 case PRE_MODIFY:
19527 op0 = XEXP (addr, 0);
19528 op1 = XEXP (addr, 1);
19529 if (!base_reg_operand (op0, Pmode)
19530 || GET_CODE (op1) != PLUS
19531 || !rtx_equal_p (op0, XEXP (op1, 0)))
19532 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19534 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19536 emit_insn (gen_rtx_SET (op0, op1));
19537 new_addr = reg;
19539 break;
19541 /* Do we need to simulate AND -16 to clear the bottom address bits used
19542 in VMX load/stores? */
19543 case AND:
19544 op0 = XEXP (addr, 0);
19545 op1 = XEXP (addr, 1);
19546 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19548 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
19549 op_reg = op0;
19551 else if (GET_CODE (op1) == PLUS)
19553 emit_insn (gen_rtx_SET (scratch, op1));
19554 op_reg = scratch;
19557 else
19558 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19560 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19561 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19562 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19563 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19564 new_addr = scratch;
19566 break;
19568 /* If this is an indirect address, make sure it is a base register. */
19569 case REG:
19570 case SUBREG:
19571 if (!base_reg_operand (addr, GET_MODE (addr)))
19573 emit_insn (gen_rtx_SET (scratch, addr));
19574 new_addr = scratch;
19576 break;
19578 /* If this is an indexed address, make sure the register class can handle
19579 indexed addresses for this mode. */
19580 case PLUS:
19581 op0 = XEXP (addr, 0);
19582 op1 = XEXP (addr, 1);
19583 if (!base_reg_operand (op0, Pmode))
19584 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19586 else if (int_reg_operand (op1, Pmode))
19588 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19590 emit_insn (gen_rtx_SET (scratch, addr));
19591 new_addr = scratch;
19595 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19597 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19598 || !quad_address_p (addr, mode, false))
19600 emit_insn (gen_rtx_SET (scratch, addr));
19601 new_addr = scratch;
19605 /* Make sure the register class can handle offset addresses. */
19606 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19608 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19610 emit_insn (gen_rtx_SET (scratch, addr));
19611 new_addr = scratch;
19615 else
19616 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19618 break;
19620 case LO_SUM:
19621 op0 = XEXP (addr, 0);
19622 op1 = XEXP (addr, 1);
19623 if (!base_reg_operand (op0, Pmode))
19624 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19626 else if (int_reg_operand (op1, Pmode))
19628 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19630 emit_insn (gen_rtx_SET (scratch, addr));
19631 new_addr = scratch;
19635 /* Quad offsets are restricted and can't handle normal addresses. */
19636 else if (mode_supports_dq_form (mode))
19638 emit_insn (gen_rtx_SET (scratch, addr));
19639 new_addr = scratch;
19642 /* Make sure the register class can handle offset addresses. */
19643 else if (legitimate_lo_sum_address_p (mode, addr, false))
19645 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19647 emit_insn (gen_rtx_SET (scratch, addr));
19648 new_addr = scratch;
19652 else
19653 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19655 break;
19657 case SYMBOL_REF:
19658 case CONST:
19659 case LABEL_REF:
19660 rs6000_emit_move (scratch, addr, Pmode);
19661 new_addr = scratch;
19662 break;
19664 default:
19665 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19668 /* Adjust the address if it changed. */
19669 if (addr != new_addr)
19671 mem = replace_equiv_address_nv (mem, new_addr);
19672 if (TARGET_DEBUG_ADDR)
19673 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19676 /* Now create the move. */
19677 if (store_p)
19678 emit_insn (gen_rtx_SET (mem, reg));
19679 else
19680 emit_insn (gen_rtx_SET (reg, mem));
19682 return;
19685 /* Convert reloads involving 64-bit gprs and misaligned offset
19686 addressing, or multiple 32-bit gprs and offsets that are too large,
19687 to use indirect addressing. */
19689 void
19690 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19692 int regno = true_regnum (reg);
19693 enum reg_class rclass;
19694 rtx addr;
19695 rtx scratch_or_premodify = scratch;
19697 if (TARGET_DEBUG_ADDR)
19699 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19700 store_p ? "store" : "load");
19701 fprintf (stderr, "reg:\n");
19702 debug_rtx (reg);
19703 fprintf (stderr, "mem:\n");
19704 debug_rtx (mem);
19705 fprintf (stderr, "scratch:\n");
19706 debug_rtx (scratch);
19709 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
19710 gcc_assert (GET_CODE (mem) == MEM);
19711 rclass = REGNO_REG_CLASS (regno);
19712 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19713 addr = XEXP (mem, 0);
19715 if (GET_CODE (addr) == PRE_MODIFY)
19717 gcc_assert (REG_P (XEXP (addr, 0))
19718 && GET_CODE (XEXP (addr, 1)) == PLUS
19719 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19720 scratch_or_premodify = XEXP (addr, 0);
19721 if (!HARD_REGISTER_P (scratch_or_premodify))
19722 /* If we have a pseudo here then reload will have arranged
19723 to have it replaced, but only in the original insn.
19724 Use the replacement here too. */
19725 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
19727 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
19728 expressions from the original insn, without unsharing them.
19729 Any RTL that points into the original insn will of course
19730 have register replacements applied. That is why we don't
19731 need to look for replacements under the PLUS. */
19732 addr = XEXP (addr, 1);
19734 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19736 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19738 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19740 /* Now create the move. */
19741 if (store_p)
19742 emit_insn (gen_rtx_SET (mem, reg));
19743 else
19744 emit_insn (gen_rtx_SET (reg, mem));
19746 return;
19749 /* Given an rtx X being reloaded into a reg required to be
19750 in class CLASS, return the class of reg to actually use.
19751 In general this is just CLASS; but on some machines
19752 in some cases it is preferable to use a more restrictive class.
19754 On the RS/6000, we have to return NO_REGS when we want to reload a
19755 floating-point CONST_DOUBLE to force it to be copied to memory.
19757 We also don't want to reload integer values into floating-point
19758 registers if we can at all help it. In fact, this can
19759 cause reload to die, if it tries to generate a reload of CTR
19760 into a FP register and discovers it doesn't have the memory location
19761 required.
19763 ??? Would it be a good idea to have reload do the converse, that is
19764 try to reload floating modes into FP registers if possible?
19767 static enum reg_class
19768 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19770 machine_mode mode = GET_MODE (x);
19771 bool is_constant = CONSTANT_P (x);
19773 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19774 reload class for it. */
19775 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19776 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19777 return NO_REGS;
19779 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19780 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19781 return NO_REGS;
19783 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19784 the reloading of address expressions using PLUS into floating point
19785 registers. */
19786 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19788 if (is_constant)
19790 /* Zero is always allowed in all VSX registers. */
19791 if (x == CONST0_RTX (mode))
19792 return rclass;
19794 /* If this is a vector constant that can be formed with a few Altivec
19795 instructions, we want altivec registers. */
19796 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19797 return ALTIVEC_REGS;
19799 /* If this is an integer constant that can easily be loaded into
19800 vector registers, allow it. */
19801 if (CONST_INT_P (x))
19803 HOST_WIDE_INT value = INTVAL (x);
19805 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19806 2.06 can generate it in the Altivec registers with
19807 VSPLTI<x>. */
19808 if (value == -1)
19810 if (TARGET_P8_VECTOR)
19811 return rclass;
19812 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19813 return ALTIVEC_REGS;
19814 else
19815 return NO_REGS;
19818 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19819 a sign extend in the Altivec registers. */
19820 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19821 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19822 return ALTIVEC_REGS;
19825 /* Force constant to memory. */
19826 return NO_REGS;
19829 /* D-form addressing can easily reload the value. */
19830 if (mode_supports_vmx_dform (mode)
19831 || mode_supports_dq_form (mode))
19832 return rclass;
19834 /* If this is a scalar floating point value and we don't have D-form
19835 addressing, prefer the traditional floating point registers so that we
19836 can use D-form (register+offset) addressing. */
19837 if (rclass == VSX_REGS
19838 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19839 return FLOAT_REGS;
19841 /* Prefer the Altivec registers if Altivec is handling the vector
19842 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19843 loads. */
19844 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19845 || mode == V1TImode)
19846 return ALTIVEC_REGS;
19848 return rclass;
19851 if (is_constant || GET_CODE (x) == PLUS)
19853 if (reg_class_subset_p (GENERAL_REGS, rclass))
19854 return GENERAL_REGS;
19855 if (reg_class_subset_p (BASE_REGS, rclass))
19856 return BASE_REGS;
19857 return NO_REGS;
19860 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
19861 return GENERAL_REGS;
19863 return rclass;
19866 /* Debug version of rs6000_preferred_reload_class. */
19867 static enum reg_class
19868 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19870 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19872 fprintf (stderr,
19873 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19874 "mode = %s, x:\n",
19875 reg_class_names[ret], reg_class_names[rclass],
19876 GET_MODE_NAME (GET_MODE (x)));
19877 debug_rtx (x);
19879 return ret;
19882 /* If we are copying between FP or AltiVec registers and anything else, we need
19883 a memory location. The exception is when we are targeting ppc64 and the
19884 move to/from fpr to gpr instructions are available. Also, under VSX, you
19885 can copy vector registers from the FP register set to the Altivec register
19886 set and vice versa. */
19888 static bool
19889 rs6000_secondary_memory_needed (machine_mode mode,
19890 reg_class_t from_class,
19891 reg_class_t to_class)
19893 enum rs6000_reg_type from_type, to_type;
19894 bool altivec_p = ((from_class == ALTIVEC_REGS)
19895 || (to_class == ALTIVEC_REGS));
19897 /* If a simple/direct move is available, we don't need secondary memory */
19898 from_type = reg_class_to_reg_type[(int)from_class];
19899 to_type = reg_class_to_reg_type[(int)to_class];
19901 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19902 (secondary_reload_info *)0, altivec_p))
19903 return false;
19905 /* If we have a floating point or vector register class, we need to use
19906 memory to transfer the data. */
19907 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19908 return true;
19910 return false;
19913 /* Debug version of rs6000_secondary_memory_needed. */
19914 static bool
19915 rs6000_debug_secondary_memory_needed (machine_mode mode,
19916 reg_class_t from_class,
19917 reg_class_t to_class)
19919 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19921 fprintf (stderr,
19922 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19923 "to_class = %s, mode = %s\n",
19924 ret ? "true" : "false",
19925 reg_class_names[from_class],
19926 reg_class_names[to_class],
19927 GET_MODE_NAME (mode));
19929 return ret;
19932 /* Return the register class of a scratch register needed to copy IN into
19933 or out of a register in RCLASS in MODE. If it can be done directly,
19934 NO_REGS is returned. */
19936 static enum reg_class
19937 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19938 rtx in)
19940 int regno;
19942 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19943 #if TARGET_MACHO
19944 && MACHOPIC_INDIRECT
19945 #endif
19948 /* We cannot copy a symbolic operand directly into anything
19949 other than BASE_REGS for TARGET_ELF. So indicate that a
19950 register from BASE_REGS is needed as an intermediate
19951 register.
19953 On Darwin, pic addresses require a load from memory, which
19954 needs a base register. */
19955 if (rclass != BASE_REGS
19956 && (GET_CODE (in) == SYMBOL_REF
19957 || GET_CODE (in) == HIGH
19958 || GET_CODE (in) == LABEL_REF
19959 || GET_CODE (in) == CONST))
19960 return BASE_REGS;
19963 if (GET_CODE (in) == REG)
19965 regno = REGNO (in);
19966 if (regno >= FIRST_PSEUDO_REGISTER)
19968 regno = true_regnum (in);
19969 if (regno >= FIRST_PSEUDO_REGISTER)
19970 regno = -1;
19973 else if (GET_CODE (in) == SUBREG)
19975 regno = true_regnum (in);
19976 if (regno >= FIRST_PSEUDO_REGISTER)
19977 regno = -1;
19979 else
19980 regno = -1;
19982 /* If we have VSX register moves, prefer moving scalar values between
19983 Altivec registers and GPR by going via an FPR (and then via memory)
19984 instead of reloading the secondary memory address for Altivec moves. */
19985 if (TARGET_VSX
19986 && GET_MODE_SIZE (mode) < 16
19987 && !mode_supports_vmx_dform (mode)
19988 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19989 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19990 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19991 && (regno >= 0 && INT_REGNO_P (regno)))))
19992 return FLOAT_REGS;
19994 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19995 into anything. */
19996 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19997 || (regno >= 0 && INT_REGNO_P (regno)))
19998 return NO_REGS;
20000 /* Constants, memory, and VSX registers can go into VSX registers (both the
20001 traditional floating point and the altivec registers). */
20002 if (rclass == VSX_REGS
20003 && (regno == -1 || VSX_REGNO_P (regno)))
20004 return NO_REGS;
20006 /* Constants, memory, and FP registers can go into FP registers. */
20007 if ((regno == -1 || FP_REGNO_P (regno))
20008 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20009 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20011 /* Memory, and AltiVec registers can go into AltiVec registers. */
20012 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20013 && rclass == ALTIVEC_REGS)
20014 return NO_REGS;
20016 /* We can copy among the CR registers. */
20017 if ((rclass == CR_REGS || rclass == CR0_REGS)
20018 && regno >= 0 && CR_REGNO_P (regno))
20019 return NO_REGS;
20021 /* Otherwise, we need GENERAL_REGS. */
20022 return GENERAL_REGS;
20025 /* Debug version of rs6000_secondary_reload_class. */
20026 static enum reg_class
20027 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20028 machine_mode mode, rtx in)
20030 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20031 fprintf (stderr,
20032 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20033 "mode = %s, input rtx:\n",
20034 reg_class_names[ret], reg_class_names[rclass],
20035 GET_MODE_NAME (mode));
20036 debug_rtx (in);
20038 return ret;
20041 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20043 static bool
20044 rs6000_can_change_mode_class (machine_mode from,
20045 machine_mode to,
20046 reg_class_t rclass)
20048 unsigned from_size = GET_MODE_SIZE (from);
20049 unsigned to_size = GET_MODE_SIZE (to);
20051 if (from_size != to_size)
20053 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20055 if (reg_classes_intersect_p (xclass, rclass))
20057 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
20058 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
20059 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20060 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20062 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20063 single register under VSX because the scalar part of the register
20064 is in the upper 64-bits, and not the lower 64-bits. Types like
20065 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20066 IEEE floating point can't overlap, and neither can small
20067 values. */
20069 if (to_float128_vector_p && from_float128_vector_p)
20070 return true;
20072 else if (to_float128_vector_p || from_float128_vector_p)
20073 return false;
20075 /* TDmode in floating-mode registers must always go into a register
20076 pair with the most significant word in the even-numbered register
20077 to match ISA requirements. In little-endian mode, this does not
20078 match subreg numbering, so we cannot allow subregs. */
20079 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20080 return false;
20082 if (from_size < 8 || to_size < 8)
20083 return false;
20085 if (from_size == 8 && (8 * to_nregs) != to_size)
20086 return false;
20088 if (to_size == 8 && (8 * from_nregs) != from_size)
20089 return false;
20091 return true;
20093 else
20094 return true;
20097 /* Since the VSX register set includes traditional floating point registers
20098 and altivec registers, just check for the size being different instead of
20099 trying to check whether the modes are vector modes. Otherwise it won't
20100 allow say DF and DI to change classes. For types like TFmode and TDmode
20101 that take 2 64-bit registers, rather than a single 128-bit register, don't
20102 allow subregs of those types to other 128 bit types. */
20103 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20105 unsigned num_regs = (from_size + 15) / 16;
20106 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
20107 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
20108 return false;
20110 return (from_size == 8 || from_size == 16);
20113 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20114 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20115 return false;
20117 return true;
20120 /* Debug version of rs6000_can_change_mode_class. */
20121 static bool
20122 rs6000_debug_can_change_mode_class (machine_mode from,
20123 machine_mode to,
20124 reg_class_t rclass)
20126 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20128 fprintf (stderr,
20129 "rs6000_can_change_mode_class, return %s, from = %s, "
20130 "to = %s, rclass = %s\n",
20131 ret ? "true" : "false",
20132 GET_MODE_NAME (from), GET_MODE_NAME (to),
20133 reg_class_names[rclass]);
20135 return ret;
20138 /* Return a string to do a move operation of 128 bits of data. */
20140 const char *
20141 rs6000_output_move_128bit (rtx operands[])
20143 rtx dest = operands[0];
20144 rtx src = operands[1];
20145 machine_mode mode = GET_MODE (dest);
20146 int dest_regno;
20147 int src_regno;
20148 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20149 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20151 if (REG_P (dest))
20153 dest_regno = REGNO (dest);
20154 dest_gpr_p = INT_REGNO_P (dest_regno);
20155 dest_fp_p = FP_REGNO_P (dest_regno);
20156 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20157 dest_vsx_p = dest_fp_p | dest_vmx_p;
20159 else
20161 dest_regno = -1;
20162 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20165 if (REG_P (src))
20167 src_regno = REGNO (src);
20168 src_gpr_p = INT_REGNO_P (src_regno);
20169 src_fp_p = FP_REGNO_P (src_regno);
20170 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20171 src_vsx_p = src_fp_p | src_vmx_p;
20173 else
20175 src_regno = -1;
20176 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20179 /* Register moves. */
20180 if (dest_regno >= 0 && src_regno >= 0)
20182 if (dest_gpr_p)
20184 if (src_gpr_p)
20185 return "#";
20187 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20188 return (WORDS_BIG_ENDIAN
20189 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20190 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20192 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20193 return "#";
20196 else if (TARGET_VSX && dest_vsx_p)
20198 if (src_vsx_p)
20199 return "xxlor %x0,%x1,%x1";
20201 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20202 return (WORDS_BIG_ENDIAN
20203 ? "mtvsrdd %x0,%1,%L1"
20204 : "mtvsrdd %x0,%L1,%1");
20206 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20207 return "#";
20210 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20211 return "vor %0,%1,%1";
20213 else if (dest_fp_p && src_fp_p)
20214 return "#";
20217 /* Loads. */
20218 else if (dest_regno >= 0 && MEM_P (src))
20220 if (dest_gpr_p)
20222 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20223 return "lq %0,%1";
20224 else
20225 return "#";
20228 else if (TARGET_ALTIVEC && dest_vmx_p
20229 && altivec_indexed_or_indirect_operand (src, mode))
20230 return "lvx %0,%y1";
20232 else if (TARGET_VSX && dest_vsx_p)
20234 if (mode_supports_dq_form (mode)
20235 && quad_address_p (XEXP (src, 0), mode, true))
20236 return "lxv %x0,%1";
20238 else if (TARGET_P9_VECTOR)
20239 return "lxvx %x0,%y1";
20241 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20242 return "lxvw4x %x0,%y1";
20244 else
20245 return "lxvd2x %x0,%y1";
20248 else if (TARGET_ALTIVEC && dest_vmx_p)
20249 return "lvx %0,%y1";
20251 else if (dest_fp_p)
20252 return "#";
20255 /* Stores. */
20256 else if (src_regno >= 0 && MEM_P (dest))
20258 if (src_gpr_p)
20260 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20261 return "stq %1,%0";
20262 else
20263 return "#";
20266 else if (TARGET_ALTIVEC && src_vmx_p
20267 && altivec_indexed_or_indirect_operand (dest, mode))
20268 return "stvx %1,%y0";
20270 else if (TARGET_VSX && src_vsx_p)
20272 if (mode_supports_dq_form (mode)
20273 && quad_address_p (XEXP (dest, 0), mode, true))
20274 return "stxv %x1,%0";
20276 else if (TARGET_P9_VECTOR)
20277 return "stxvx %x1,%y0";
20279 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20280 return "stxvw4x %x1,%y0";
20282 else
20283 return "stxvd2x %x1,%y0";
20286 else if (TARGET_ALTIVEC && src_vmx_p)
20287 return "stvx %1,%y0";
20289 else if (src_fp_p)
20290 return "#";
20293 /* Constants. */
20294 else if (dest_regno >= 0
20295 && (GET_CODE (src) == CONST_INT
20296 || GET_CODE (src) == CONST_WIDE_INT
20297 || GET_CODE (src) == CONST_DOUBLE
20298 || GET_CODE (src) == CONST_VECTOR))
20300 if (dest_gpr_p)
20301 return "#";
20303 else if ((dest_vmx_p && TARGET_ALTIVEC)
20304 || (dest_vsx_p && TARGET_VSX))
20305 return output_vec_const_move (operands);
20308 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20311 /* Validate a 128-bit move. */
20312 bool
20313 rs6000_move_128bit_ok_p (rtx operands[])
20315 machine_mode mode = GET_MODE (operands[0]);
20316 return (gpc_reg_operand (operands[0], mode)
20317 || gpc_reg_operand (operands[1], mode));
20320 /* Return true if a 128-bit move needs to be split. */
20321 bool
20322 rs6000_split_128bit_ok_p (rtx operands[])
20324 if (!reload_completed)
20325 return false;
20327 if (!gpr_or_gpr_p (operands[0], operands[1]))
20328 return false;
20330 if (quad_load_store_p (operands[0], operands[1]))
20331 return false;
20333 return true;
20337 /* Given a comparison operation, return the bit number in CCR to test. We
20338 know this is a valid comparison.
20340 SCC_P is 1 if this is for an scc. That means that %D will have been
20341 used instead of %C, so the bits will be in different places.
20343 Return -1 if OP isn't a valid comparison for some reason. */
20346 ccr_bit (rtx op, int scc_p)
20348 enum rtx_code code = GET_CODE (op);
20349 machine_mode cc_mode;
20350 int cc_regnum;
20351 int base_bit;
20352 rtx reg;
20354 if (!COMPARISON_P (op))
20355 return -1;
20357 reg = XEXP (op, 0);
20359 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
20361 cc_mode = GET_MODE (reg);
20362 cc_regnum = REGNO (reg);
20363 base_bit = 4 * (cc_regnum - CR0_REGNO);
20365 validate_condition_mode (code, cc_mode);
20367 /* When generating a sCOND operation, only positive conditions are
20368 allowed. */
20369 gcc_assert (!scc_p
20370 || code == EQ || code == GT || code == LT || code == UNORDERED
20371 || code == GTU || code == LTU);
20373 switch (code)
20375 case NE:
20376 return scc_p ? base_bit + 3 : base_bit + 2;
20377 case EQ:
20378 return base_bit + 2;
20379 case GT: case GTU: case UNLE:
20380 return base_bit + 1;
20381 case LT: case LTU: case UNGE:
20382 return base_bit;
20383 case ORDERED: case UNORDERED:
20384 return base_bit + 3;
20386 case GE: case GEU:
20387 /* If scc, we will have done a cror to put the bit in the
20388 unordered position. So test that bit. For integer, this is ! LT
20389 unless this is an scc insn. */
20390 return scc_p ? base_bit + 3 : base_bit;
20392 case LE: case LEU:
20393 return scc_p ? base_bit + 3 : base_bit + 1;
20395 default:
20396 gcc_unreachable ();
20400 /* Return the GOT register. */
20403 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20405 /* The second flow pass currently (June 1999) can't update
20406 regs_ever_live without disturbing other parts of the compiler, so
20407 update it here to make the prolog/epilogue code happy. */
20408 if (!can_create_pseudo_p ()
20409 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20410 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20412 crtl->uses_pic_offset_table = 1;
20414 return pic_offset_table_rtx;
20417 static rs6000_stack_t stack_info;
20419 /* Function to init struct machine_function.
20420 This will be called, via a pointer variable,
20421 from push_function_context. */
20423 static struct machine_function *
20424 rs6000_init_machine_status (void)
20426 stack_info.reload_completed = 0;
20427 return ggc_cleared_alloc<machine_function> ();
20430 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
20432 /* Write out a function code label. */
20434 void
20435 rs6000_output_function_entry (FILE *file, const char *fname)
20437 if (fname[0] != '.')
20439 switch (DEFAULT_ABI)
20441 default:
20442 gcc_unreachable ();
20444 case ABI_AIX:
20445 if (DOT_SYMBOLS)
20446 putc ('.', file);
20447 else
20448 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20449 break;
20451 case ABI_ELFv2:
20452 case ABI_V4:
20453 case ABI_DARWIN:
20454 break;
20458 RS6000_OUTPUT_BASENAME (file, fname);
20461 /* Print an operand. Recognize special options, documented below. */
20463 #if TARGET_ELF
20464 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20465 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20466 #else
20467 #define SMALL_DATA_RELOC "sda21"
20468 #define SMALL_DATA_REG 0
20469 #endif
20471 void
20472 print_operand (FILE *file, rtx x, int code)
20474 int i;
20475 unsigned HOST_WIDE_INT uval;
20477 switch (code)
20479 /* %a is output_address. */
20481 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20482 output_operand. */
20484 case 'D':
20485 /* Like 'J' but get to the GT bit only. */
20486 gcc_assert (REG_P (x));
20488 /* Bit 1 is GT bit. */
20489 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20491 /* Add one for shift count in rlinm for scc. */
20492 fprintf (file, "%d", i + 1);
20493 return;
20495 case 'e':
20496 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20497 if (! INT_P (x))
20499 output_operand_lossage ("invalid %%e value");
20500 return;
20503 uval = INTVAL (x);
20504 if ((uval & 0xffff) == 0 && uval != 0)
20505 putc ('s', file);
20506 return;
20508 case 'E':
20509 /* X is a CR register. Print the number of the EQ bit of the CR */
20510 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20511 output_operand_lossage ("invalid %%E value");
20512 else
20513 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20514 return;
20516 case 'f':
20517 /* X is a CR register. Print the shift count needed to move it
20518 to the high-order four bits. */
20519 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20520 output_operand_lossage ("invalid %%f value");
20521 else
20522 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20523 return;
20525 case 'F':
20526 /* Similar, but print the count for the rotate in the opposite
20527 direction. */
20528 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20529 output_operand_lossage ("invalid %%F value");
20530 else
20531 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20532 return;
20534 case 'G':
20535 /* X is a constant integer. If it is negative, print "m",
20536 otherwise print "z". This is to make an aze or ame insn. */
20537 if (GET_CODE (x) != CONST_INT)
20538 output_operand_lossage ("invalid %%G value");
20539 else if (INTVAL (x) >= 0)
20540 putc ('z', file);
20541 else
20542 putc ('m', file);
20543 return;
20545 case 'h':
20546 /* If constant, output low-order five bits. Otherwise, write
20547 normally. */
20548 if (INT_P (x))
20549 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20550 else
20551 print_operand (file, x, 0);
20552 return;
20554 case 'H':
20555 /* If constant, output low-order six bits. Otherwise, write
20556 normally. */
20557 if (INT_P (x))
20558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20559 else
20560 print_operand (file, x, 0);
20561 return;
20563 case 'I':
20564 /* Print `i' if this is a constant, else nothing. */
20565 if (INT_P (x))
20566 putc ('i', file);
20567 return;
20569 case 'j':
20570 /* Write the bit number in CCR for jump. */
20571 i = ccr_bit (x, 0);
20572 if (i == -1)
20573 output_operand_lossage ("invalid %%j code");
20574 else
20575 fprintf (file, "%d", i);
20576 return;
20578 case 'J':
20579 /* Similar, but add one for shift count in rlinm for scc and pass
20580 scc flag to `ccr_bit'. */
20581 i = ccr_bit (x, 1);
20582 if (i == -1)
20583 output_operand_lossage ("invalid %%J code");
20584 else
20585 /* If we want bit 31, write a shift count of zero, not 32. */
20586 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20587 return;
20589 case 'k':
20590 /* X must be a constant. Write the 1's complement of the
20591 constant. */
20592 if (! INT_P (x))
20593 output_operand_lossage ("invalid %%k value");
20594 else
20595 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20596 return;
20598 case 'K':
20599 /* X must be a symbolic constant on ELF. Write an
20600 expression suitable for an 'addi' that adds in the low 16
20601 bits of the MEM. */
20602 if (GET_CODE (x) == CONST)
20604 if (GET_CODE (XEXP (x, 0)) != PLUS
20605 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
20606 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20607 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
20608 output_operand_lossage ("invalid %%K value");
20610 print_operand_address (file, x);
20611 fputs ("@l", file);
20612 return;
20614 /* %l is output_asm_label. */
20616 case 'L':
20617 /* Write second word of DImode or DFmode reference. Works on register
20618 or non-indexed memory only. */
20619 if (REG_P (x))
20620 fputs (reg_names[REGNO (x) + 1], file);
20621 else if (MEM_P (x))
20623 machine_mode mode = GET_MODE (x);
20624 /* Handle possible auto-increment. Since it is pre-increment and
20625 we have already done it, we can just use an offset of word. */
20626 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20627 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20628 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20629 UNITS_PER_WORD));
20630 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20631 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20632 UNITS_PER_WORD));
20633 else
20634 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20635 UNITS_PER_WORD),
20636 0));
20638 if (small_data_operand (x, GET_MODE (x)))
20639 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20640 reg_names[SMALL_DATA_REG]);
20642 return;
20644 case 'N': /* Unused */
20645 /* Write the number of elements in the vector times 4. */
20646 if (GET_CODE (x) != PARALLEL)
20647 output_operand_lossage ("invalid %%N value");
20648 else
20649 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20650 return;
20652 case 'O': /* Unused */
20653 /* Similar, but subtract 1 first. */
20654 if (GET_CODE (x) != PARALLEL)
20655 output_operand_lossage ("invalid %%O value");
20656 else
20657 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20658 return;
20660 case 'p':
20661 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20662 if (! INT_P (x)
20663 || INTVAL (x) < 0
20664 || (i = exact_log2 (INTVAL (x))) < 0)
20665 output_operand_lossage ("invalid %%p value");
20666 else
20667 fprintf (file, "%d", i);
20668 return;
20670 case 'P':
20671 /* The operand must be an indirect memory reference. The result
20672 is the register name. */
20673 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
20674 || REGNO (XEXP (x, 0)) >= 32)
20675 output_operand_lossage ("invalid %%P value");
20676 else
20677 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20678 return;
20680 case 'q':
20681 /* This outputs the logical code corresponding to a boolean
20682 expression. The expression may have one or both operands
20683 negated (if one, only the first one). For condition register
20684 logical operations, it will also treat the negated
20685 CR codes as NOTs, but not handle NOTs of them. */
20687 const char *const *t = 0;
20688 const char *s;
20689 enum rtx_code code = GET_CODE (x);
20690 static const char * const tbl[3][3] = {
20691 { "and", "andc", "nor" },
20692 { "or", "orc", "nand" },
20693 { "xor", "eqv", "xor" } };
20695 if (code == AND)
20696 t = tbl[0];
20697 else if (code == IOR)
20698 t = tbl[1];
20699 else if (code == XOR)
20700 t = tbl[2];
20701 else
20702 output_operand_lossage ("invalid %%q value");
20704 if (GET_CODE (XEXP (x, 0)) != NOT)
20705 s = t[0];
20706 else
20708 if (GET_CODE (XEXP (x, 1)) == NOT)
20709 s = t[2];
20710 else
20711 s = t[1];
20714 fputs (s, file);
20716 return;
20718 case 'Q':
20719 if (! TARGET_MFCRF)
20720 return;
20721 fputc (',', file);
20722 /* FALLTHRU */
20724 case 'R':
20725 /* X is a CR register. Print the mask for `mtcrf'. */
20726 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20727 output_operand_lossage ("invalid %%R value");
20728 else
20729 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20730 return;
20732 case 's':
20733 /* Low 5 bits of 32 - value */
20734 if (! INT_P (x))
20735 output_operand_lossage ("invalid %%s value");
20736 else
20737 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20738 return;
20740 case 't':
20741 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20742 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
20744 /* Bit 3 is OV bit. */
20745 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20747 /* If we want bit 31, write a shift count of zero, not 32. */
20748 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20749 return;
20751 case 'T':
20752 /* Print the symbolic name of a branch target register. */
20753 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
20754 && REGNO (x) != CTR_REGNO))
20755 output_operand_lossage ("invalid %%T value");
20756 else if (REGNO (x) == LR_REGNO)
20757 fputs ("lr", file);
20758 else
20759 fputs ("ctr", file);
20760 return;
20762 case 'u':
20763 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20764 for use in unsigned operand. */
20765 if (! INT_P (x))
20767 output_operand_lossage ("invalid %%u value");
20768 return;
20771 uval = INTVAL (x);
20772 if ((uval & 0xffff) == 0)
20773 uval >>= 16;
20775 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20776 return;
20778 case 'v':
20779 /* High-order 16 bits of constant for use in signed operand. */
20780 if (! INT_P (x))
20781 output_operand_lossage ("invalid %%v value");
20782 else
20783 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20784 (INTVAL (x) >> 16) & 0xffff);
20785 return;
20787 case 'U':
20788 /* Print `u' if this has an auto-increment or auto-decrement. */
20789 if (MEM_P (x)
20790 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20791 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20792 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20793 putc ('u', file);
20794 return;
20796 case 'V':
20797 /* Print the trap code for this operand. */
20798 switch (GET_CODE (x))
20800 case EQ:
20801 fputs ("eq", file); /* 4 */
20802 break;
20803 case NE:
20804 fputs ("ne", file); /* 24 */
20805 break;
20806 case LT:
20807 fputs ("lt", file); /* 16 */
20808 break;
20809 case LE:
20810 fputs ("le", file); /* 20 */
20811 break;
20812 case GT:
20813 fputs ("gt", file); /* 8 */
20814 break;
20815 case GE:
20816 fputs ("ge", file); /* 12 */
20817 break;
20818 case LTU:
20819 fputs ("llt", file); /* 2 */
20820 break;
20821 case LEU:
20822 fputs ("lle", file); /* 6 */
20823 break;
20824 case GTU:
20825 fputs ("lgt", file); /* 1 */
20826 break;
20827 case GEU:
20828 fputs ("lge", file); /* 5 */
20829 break;
20830 default:
20831 gcc_unreachable ();
20833 break;
20835 case 'w':
20836 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20837 normally. */
20838 if (INT_P (x))
20839 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20840 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20841 else
20842 print_operand (file, x, 0);
20843 return;
20845 case 'x':
20846 /* X is a FPR or Altivec register used in a VSX context. */
20847 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
20848 output_operand_lossage ("invalid %%x value");
20849 else
20851 int reg = REGNO (x);
20852 int vsx_reg = (FP_REGNO_P (reg)
20853 ? reg - 32
20854 : reg - FIRST_ALTIVEC_REGNO + 32);
20856 #ifdef TARGET_REGNAMES
20857 if (TARGET_REGNAMES)
20858 fprintf (file, "%%vs%d", vsx_reg);
20859 else
20860 #endif
20861 fprintf (file, "%d", vsx_reg);
20863 return;
20865 case 'X':
20866 if (MEM_P (x)
20867 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20868 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20869 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20870 putc ('x', file);
20871 return;
20873 case 'Y':
20874 /* Like 'L', for third word of TImode/PTImode */
20875 if (REG_P (x))
20876 fputs (reg_names[REGNO (x) + 2], file);
20877 else if (MEM_P (x))
20879 machine_mode mode = GET_MODE (x);
20880 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20881 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20882 output_address (mode, plus_constant (Pmode,
20883 XEXP (XEXP (x, 0), 0), 8));
20884 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20885 output_address (mode, plus_constant (Pmode,
20886 XEXP (XEXP (x, 0), 0), 8));
20887 else
20888 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20889 if (small_data_operand (x, GET_MODE (x)))
20890 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20891 reg_names[SMALL_DATA_REG]);
20893 return;
20895 case 'z':
20896 /* X is a SYMBOL_REF. Write out the name preceded by a
20897 period and without any trailing data in brackets. Used for function
20898 names. If we are configured for System V (or the embedded ABI) on
20899 the PowerPC, do not emit the period, since those systems do not use
20900 TOCs and the like. */
20901 gcc_assert (GET_CODE (x) == SYMBOL_REF);
20903 /* For macho, check to see if we need a stub. */
20904 if (TARGET_MACHO)
20906 const char *name = XSTR (x, 0);
20907 #if TARGET_MACHO
20908 if (darwin_emit_branch_islands
20909 && MACHOPIC_INDIRECT
20910 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20911 name = machopic_indirection_name (x, /*stub_p=*/true);
20912 #endif
20913 assemble_name (file, name);
20915 else if (!DOT_SYMBOLS)
20916 assemble_name (file, XSTR (x, 0));
20917 else
20918 rs6000_output_function_entry (file, XSTR (x, 0));
20919 return;
20921 case 'Z':
20922 /* Like 'L', for last word of TImode/PTImode. */
20923 if (REG_P (x))
20924 fputs (reg_names[REGNO (x) + 3], file);
20925 else if (MEM_P (x))
20927 machine_mode mode = GET_MODE (x);
20928 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20929 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20930 output_address (mode, plus_constant (Pmode,
20931 XEXP (XEXP (x, 0), 0), 12));
20932 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20933 output_address (mode, plus_constant (Pmode,
20934 XEXP (XEXP (x, 0), 0), 12));
20935 else
20936 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20937 if (small_data_operand (x, GET_MODE (x)))
20938 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20939 reg_names[SMALL_DATA_REG]);
20941 return;
20943 /* Print AltiVec memory operand. */
20944 case 'y':
20946 rtx tmp;
20948 gcc_assert (MEM_P (x));
20950 tmp = XEXP (x, 0);
20952 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
20953 && GET_CODE (tmp) == AND
20954 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
20955 && INTVAL (XEXP (tmp, 1)) == -16)
20956 tmp = XEXP (tmp, 0);
20957 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20958 && GET_CODE (tmp) == PRE_MODIFY)
20959 tmp = XEXP (tmp, 1);
20960 if (REG_P (tmp))
20961 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20962 else
20964 if (GET_CODE (tmp) != PLUS
20965 || !REG_P (XEXP (tmp, 0))
20966 || !REG_P (XEXP (tmp, 1)))
20968 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20969 break;
20972 if (REGNO (XEXP (tmp, 0)) == 0)
20973 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20974 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20975 else
20976 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20977 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20979 break;
20982 case 0:
20983 if (REG_P (x))
20984 fprintf (file, "%s", reg_names[REGNO (x)]);
20985 else if (MEM_P (x))
20987 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20988 know the width from the mode. */
20989 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20990 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20991 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20992 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20993 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20994 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20995 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20996 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20997 else
20998 output_address (GET_MODE (x), XEXP (x, 0));
21000 else
21002 if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21003 /* This hack along with a corresponding hack in
21004 rs6000_output_addr_const_extra arranges to output addends
21005 where the assembler expects to find them. eg.
21006 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21007 without this hack would be output as "x@toc+4". We
21008 want "x+4@toc". */
21009 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21010 else
21011 output_addr_const (file, x);
21013 return;
21015 case '&':
21016 if (const char *name = get_some_local_dynamic_name ())
21017 assemble_name (file, name);
21018 else
21019 output_operand_lossage ("'%%&' used without any "
21020 "local dynamic TLS references");
21021 return;
21023 default:
21024 output_operand_lossage ("invalid %%xn code");
21028 /* Print the address of an operand. */
21030 void
21031 print_operand_address (FILE *file, rtx x)
21033 if (REG_P (x))
21034 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21035 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21036 || GET_CODE (x) == LABEL_REF)
21038 output_addr_const (file, x);
21039 if (small_data_operand (x, GET_MODE (x)))
21040 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21041 reg_names[SMALL_DATA_REG]);
21042 else
21043 gcc_assert (!TARGET_TOC);
21045 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21046 && REG_P (XEXP (x, 1)))
21048 if (REGNO (XEXP (x, 0)) == 0)
21049 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21050 reg_names[ REGNO (XEXP (x, 0)) ]);
21051 else
21052 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21053 reg_names[ REGNO (XEXP (x, 1)) ]);
21055 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21056 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21057 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21058 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21059 #if TARGET_MACHO
21060 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21061 && CONSTANT_P (XEXP (x, 1)))
21063 fprintf (file, "lo16(");
21064 output_addr_const (file, XEXP (x, 1));
21065 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21067 #endif
21068 #if TARGET_ELF
21069 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21070 && CONSTANT_P (XEXP (x, 1)))
21072 output_addr_const (file, XEXP (x, 1));
21073 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21075 #endif
21076 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21078 /* This hack along with a corresponding hack in
21079 rs6000_output_addr_const_extra arranges to output addends
21080 where the assembler expects to find them. eg.
21081 (lo_sum (reg 9)
21082 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21083 without this hack would be output as "x@toc+8@l(9)". We
21084 want "x+8@toc@l(9)". */
21085 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21086 if (GET_CODE (x) == LO_SUM)
21087 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21088 else
21089 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21091 else
21092 gcc_unreachable ();
21095 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21097 static bool
21098 rs6000_output_addr_const_extra (FILE *file, rtx x)
21100 if (GET_CODE (x) == UNSPEC)
21101 switch (XINT (x, 1))
21103 case UNSPEC_TOCREL:
21104 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21105 && REG_P (XVECEXP (x, 0, 1))
21106 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21107 output_addr_const (file, XVECEXP (x, 0, 0));
21108 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21110 if (INTVAL (tocrel_offset_oac) >= 0)
21111 fprintf (file, "+");
21112 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21114 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21116 putc ('-', file);
21117 assemble_name (file, toc_label_name);
21118 need_toc_init = 1;
21120 else if (TARGET_ELF)
21121 fputs ("@toc", file);
21122 return true;
21124 #if TARGET_MACHO
21125 case UNSPEC_MACHOPIC_OFFSET:
21126 output_addr_const (file, XVECEXP (x, 0, 0));
21127 putc ('-', file);
21128 machopic_output_function_base_name (file);
21129 return true;
21130 #endif
21132 return false;
21135 /* Target hook for assembling integer objects. The PowerPC version has
21136 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21137 is defined. It also needs to handle DI-mode objects on 64-bit
21138 targets. */
21140 static bool
21141 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21143 #ifdef RELOCATABLE_NEEDS_FIXUP
21144 /* Special handling for SI values. */
21145 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21147 static int recurse = 0;
21149 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21150 the .fixup section. Since the TOC section is already relocated, we
21151 don't need to mark it here. We used to skip the text section, but it
21152 should never be valid for relocated addresses to be placed in the text
21153 section. */
21154 if (DEFAULT_ABI == ABI_V4
21155 && (TARGET_RELOCATABLE || flag_pic > 1)
21156 && in_section != toc_section
21157 && !recurse
21158 && !CONST_SCALAR_INT_P (x)
21159 && CONSTANT_P (x))
21161 char buf[256];
21163 recurse = 1;
21164 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21165 fixuplabelno++;
21166 ASM_OUTPUT_LABEL (asm_out_file, buf);
21167 fprintf (asm_out_file, "\t.long\t(");
21168 output_addr_const (asm_out_file, x);
21169 fprintf (asm_out_file, ")@fixup\n");
21170 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21171 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21172 fprintf (asm_out_file, "\t.long\t");
21173 assemble_name (asm_out_file, buf);
21174 fprintf (asm_out_file, "\n\t.previous\n");
21175 recurse = 0;
21176 return true;
21178 /* Remove initial .'s to turn a -mcall-aixdesc function
21179 address into the address of the descriptor, not the function
21180 itself. */
21181 else if (GET_CODE (x) == SYMBOL_REF
21182 && XSTR (x, 0)[0] == '.'
21183 && DEFAULT_ABI == ABI_AIX)
21185 const char *name = XSTR (x, 0);
21186 while (*name == '.')
21187 name++;
21189 fprintf (asm_out_file, "\t.long\t%s\n", name);
21190 return true;
21193 #endif /* RELOCATABLE_NEEDS_FIXUP */
21194 return default_assemble_integer (x, size, aligned_p);
21197 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21198 /* Emit an assembler directive to set symbol visibility for DECL to
21199 VISIBILITY_TYPE. */
21201 static void
21202 rs6000_assemble_visibility (tree decl, int vis)
21204 if (TARGET_XCOFF)
21205 return;
21207 /* Functions need to have their entry point symbol visibility set as
21208 well as their descriptor symbol visibility. */
21209 if (DEFAULT_ABI == ABI_AIX
21210 && DOT_SYMBOLS
21211 && TREE_CODE (decl) == FUNCTION_DECL)
21213 static const char * const visibility_types[] = {
21214 NULL, "protected", "hidden", "internal"
21217 const char *name, *type;
21219 name = ((* targetm.strip_name_encoding)
21220 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21221 type = visibility_types[vis];
21223 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21224 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21226 else
21227 default_assemble_visibility (decl, vis);
21229 #endif
21231 enum rtx_code
21232 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21234 /* Reversal of FP compares takes care -- an ordered compare
21235 becomes an unordered compare and vice versa. */
21236 if (mode == CCFPmode
21237 && (!flag_finite_math_only
21238 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21239 || code == UNEQ || code == LTGT))
21240 return reverse_condition_maybe_unordered (code);
21241 else
21242 return reverse_condition (code);
21245 /* Generate a compare for CODE. Return a brand-new rtx that
21246 represents the result of the compare. */
21248 static rtx
21249 rs6000_generate_compare (rtx cmp, machine_mode mode)
21251 machine_mode comp_mode;
21252 rtx compare_result;
21253 enum rtx_code code = GET_CODE (cmp);
21254 rtx op0 = XEXP (cmp, 0);
21255 rtx op1 = XEXP (cmp, 1);
21257 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21258 comp_mode = CCmode;
21259 else if (FLOAT_MODE_P (mode))
21260 comp_mode = CCFPmode;
21261 else if (code == GTU || code == LTU
21262 || code == GEU || code == LEU)
21263 comp_mode = CCUNSmode;
21264 else if ((code == EQ || code == NE)
21265 && unsigned_reg_p (op0)
21266 && (unsigned_reg_p (op1)
21267 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21268 /* These are unsigned values, perhaps there will be a later
21269 ordering compare that can be shared with this one. */
21270 comp_mode = CCUNSmode;
21271 else
21272 comp_mode = CCmode;
21274 /* If we have an unsigned compare, make sure we don't have a signed value as
21275 an immediate. */
21276 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
21277 && INTVAL (op1) < 0)
21279 op0 = copy_rtx_if_shared (op0);
21280 op1 = force_reg (GET_MODE (op0), op1);
21281 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21284 /* First, the compare. */
21285 compare_result = gen_reg_rtx (comp_mode);
21287 /* IEEE 128-bit support in VSX registers when we do not have hardware
21288 support. */
21289 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21291 rtx libfunc = NULL_RTX;
21292 bool check_nan = false;
21293 rtx dest;
21295 switch (code)
21297 case EQ:
21298 case NE:
21299 libfunc = optab_libfunc (eq_optab, mode);
21300 break;
21302 case GT:
21303 case GE:
21304 libfunc = optab_libfunc (ge_optab, mode);
21305 break;
21307 case LT:
21308 case LE:
21309 libfunc = optab_libfunc (le_optab, mode);
21310 break;
21312 case UNORDERED:
21313 case ORDERED:
21314 libfunc = optab_libfunc (unord_optab, mode);
21315 code = (code == UNORDERED) ? NE : EQ;
21316 break;
21318 case UNGE:
21319 case UNGT:
21320 check_nan = true;
21321 libfunc = optab_libfunc (ge_optab, mode);
21322 code = (code == UNGE) ? GE : GT;
21323 break;
21325 case UNLE:
21326 case UNLT:
21327 check_nan = true;
21328 libfunc = optab_libfunc (le_optab, mode);
21329 code = (code == UNLE) ? LE : LT;
21330 break;
21332 case UNEQ:
21333 case LTGT:
21334 check_nan = true;
21335 libfunc = optab_libfunc (eq_optab, mode);
21336 code = (code = UNEQ) ? EQ : NE;
21337 break;
21339 default:
21340 gcc_unreachable ();
21343 gcc_assert (libfunc);
21345 if (!check_nan)
21346 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21347 SImode, op0, mode, op1, mode);
21349 /* The library signals an exception for signalling NaNs, so we need to
21350 handle isgreater, etc. by first checking isordered. */
21351 else
21353 rtx ne_rtx, normal_dest, unord_dest;
21354 rtx unord_func = optab_libfunc (unord_optab, mode);
21355 rtx join_label = gen_label_rtx ();
21356 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21357 rtx unord_cmp = gen_reg_rtx (comp_mode);
21360 /* Test for either value being a NaN. */
21361 gcc_assert (unord_func);
21362 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21363 SImode, op0, mode, op1, mode);
21365 /* Set value (0) if either value is a NaN, and jump to the join
21366 label. */
21367 dest = gen_reg_rtx (SImode);
21368 emit_move_insn (dest, const1_rtx);
21369 emit_insn (gen_rtx_SET (unord_cmp,
21370 gen_rtx_COMPARE (comp_mode, unord_dest,
21371 const0_rtx)));
21373 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21374 emit_jump_insn (gen_rtx_SET (pc_rtx,
21375 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21376 join_ref,
21377 pc_rtx)));
21379 /* Do the normal comparison, knowing that the values are not
21380 NaNs. */
21381 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21382 SImode, op0, mode, op1, mode);
21384 emit_insn (gen_cstoresi4 (dest,
21385 gen_rtx_fmt_ee (code, SImode, normal_dest,
21386 const0_rtx),
21387 normal_dest, const0_rtx));
21389 /* Join NaN and non-Nan paths. Compare dest against 0. */
21390 emit_label (join_label);
21391 code = NE;
21394 emit_insn (gen_rtx_SET (compare_result,
21395 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21398 else
21400 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21401 CLOBBERs to match cmptf_internal2 pattern. */
21402 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21403 && FLOAT128_IBM_P (GET_MODE (op0))
21404 && TARGET_HARD_FLOAT)
21405 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21406 gen_rtvec (10,
21407 gen_rtx_SET (compare_result,
21408 gen_rtx_COMPARE (comp_mode, op0, op1)),
21409 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21410 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21411 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21412 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21413 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21414 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21415 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21416 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21417 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21418 else if (GET_CODE (op1) == UNSPEC
21419 && XINT (op1, 1) == UNSPEC_SP_TEST)
21421 rtx op1b = XVECEXP (op1, 0, 0);
21422 comp_mode = CCEQmode;
21423 compare_result = gen_reg_rtx (CCEQmode);
21424 if (TARGET_64BIT)
21425 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21426 else
21427 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21429 else
21430 emit_insn (gen_rtx_SET (compare_result,
21431 gen_rtx_COMPARE (comp_mode, op0, op1)));
21434 /* Some kinds of FP comparisons need an OR operation;
21435 under flag_finite_math_only we don't bother. */
21436 if (FLOAT_MODE_P (mode)
21437 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21438 && !flag_finite_math_only
21439 && (code == LE || code == GE
21440 || code == UNEQ || code == LTGT
21441 || code == UNGT || code == UNLT))
21443 enum rtx_code or1, or2;
21444 rtx or1_rtx, or2_rtx, compare2_rtx;
21445 rtx or_result = gen_reg_rtx (CCEQmode);
21447 switch (code)
21449 case LE: or1 = LT; or2 = EQ; break;
21450 case GE: or1 = GT; or2 = EQ; break;
21451 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21452 case LTGT: or1 = LT; or2 = GT; break;
21453 case UNGT: or1 = UNORDERED; or2 = GT; break;
21454 case UNLT: or1 = UNORDERED; or2 = LT; break;
21455 default: gcc_unreachable ();
21457 validate_condition_mode (or1, comp_mode);
21458 validate_condition_mode (or2, comp_mode);
21459 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21460 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21461 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21462 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21463 const_true_rtx);
21464 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21466 compare_result = or_result;
21467 code = EQ;
21470 validate_condition_mode (code, GET_MODE (compare_result));
21472 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21476 /* Return the diagnostic message string if the binary operation OP is
21477 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21479 static const char*
21480 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21481 const_tree type1,
21482 const_tree type2)
21484 machine_mode mode1 = TYPE_MODE (type1);
21485 machine_mode mode2 = TYPE_MODE (type2);
21487 /* For complex modes, use the inner type. */
21488 if (COMPLEX_MODE_P (mode1))
21489 mode1 = GET_MODE_INNER (mode1);
21491 if (COMPLEX_MODE_P (mode2))
21492 mode2 = GET_MODE_INNER (mode2);
21494 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21495 double to intermix unless -mfloat128-convert. */
21496 if (mode1 == mode2)
21497 return NULL;
21499 if (!TARGET_FLOAT128_CVT)
21501 if ((mode1 == KFmode && mode2 == IFmode)
21502 || (mode1 == IFmode && mode2 == KFmode))
21503 return N_("__float128 and __ibm128 cannot be used in the same "
21504 "expression");
21506 if (TARGET_IEEEQUAD
21507 && ((mode1 == IFmode && mode2 == TFmode)
21508 || (mode1 == TFmode && mode2 == IFmode)))
21509 return N_("__ibm128 and long double cannot be used in the same "
21510 "expression");
21512 if (!TARGET_IEEEQUAD
21513 && ((mode1 == KFmode && mode2 == TFmode)
21514 || (mode1 == TFmode && mode2 == KFmode)))
21515 return N_("__float128 and long double cannot be used in the same "
21516 "expression");
21519 return NULL;
21523 /* Expand floating point conversion to/from __float128 and __ibm128. */
21525 void
21526 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21528 machine_mode dest_mode = GET_MODE (dest);
21529 machine_mode src_mode = GET_MODE (src);
21530 convert_optab cvt = unknown_optab;
21531 bool do_move = false;
21532 rtx libfunc = NULL_RTX;
21533 rtx dest2;
21534 typedef rtx (*rtx_2func_t) (rtx, rtx);
21535 rtx_2func_t hw_convert = (rtx_2func_t)0;
21536 size_t kf_or_tf;
21538 struct hw_conv_t {
21539 rtx_2func_t from_df;
21540 rtx_2func_t from_sf;
21541 rtx_2func_t from_si_sign;
21542 rtx_2func_t from_si_uns;
21543 rtx_2func_t from_di_sign;
21544 rtx_2func_t from_di_uns;
21545 rtx_2func_t to_df;
21546 rtx_2func_t to_sf;
21547 rtx_2func_t to_si_sign;
21548 rtx_2func_t to_si_uns;
21549 rtx_2func_t to_di_sign;
21550 rtx_2func_t to_di_uns;
21551 } hw_conversions[2] = {
21552 /* convertions to/from KFmode */
21554 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21555 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21556 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21557 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21558 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21559 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21560 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21561 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21562 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21563 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21564 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21565 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21568 /* convertions to/from TFmode */
21570 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21571 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21572 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21573 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21574 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21575 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21576 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21577 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21578 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21579 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21580 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21581 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21585 if (dest_mode == src_mode)
21586 gcc_unreachable ();
21588 /* Eliminate memory operations. */
21589 if (MEM_P (src))
21590 src = force_reg (src_mode, src);
21592 if (MEM_P (dest))
21594 rtx tmp = gen_reg_rtx (dest_mode);
21595 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21596 rs6000_emit_move (dest, tmp, dest_mode);
21597 return;
21600 /* Convert to IEEE 128-bit floating point. */
21601 if (FLOAT128_IEEE_P (dest_mode))
21603 if (dest_mode == KFmode)
21604 kf_or_tf = 0;
21605 else if (dest_mode == TFmode)
21606 kf_or_tf = 1;
21607 else
21608 gcc_unreachable ();
21610 switch (src_mode)
21612 case E_DFmode:
21613 cvt = sext_optab;
21614 hw_convert = hw_conversions[kf_or_tf].from_df;
21615 break;
21617 case E_SFmode:
21618 cvt = sext_optab;
21619 hw_convert = hw_conversions[kf_or_tf].from_sf;
21620 break;
21622 case E_KFmode:
21623 case E_IFmode:
21624 case E_TFmode:
21625 if (FLOAT128_IBM_P (src_mode))
21626 cvt = sext_optab;
21627 else
21628 do_move = true;
21629 break;
21631 case E_SImode:
21632 if (unsigned_p)
21634 cvt = ufloat_optab;
21635 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21637 else
21639 cvt = sfloat_optab;
21640 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21642 break;
21644 case E_DImode:
21645 if (unsigned_p)
21647 cvt = ufloat_optab;
21648 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21650 else
21652 cvt = sfloat_optab;
21653 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21655 break;
21657 default:
21658 gcc_unreachable ();
21662 /* Convert from IEEE 128-bit floating point. */
21663 else if (FLOAT128_IEEE_P (src_mode))
21665 if (src_mode == KFmode)
21666 kf_or_tf = 0;
21667 else if (src_mode == TFmode)
21668 kf_or_tf = 1;
21669 else
21670 gcc_unreachable ();
21672 switch (dest_mode)
21674 case E_DFmode:
21675 cvt = trunc_optab;
21676 hw_convert = hw_conversions[kf_or_tf].to_df;
21677 break;
21679 case E_SFmode:
21680 cvt = trunc_optab;
21681 hw_convert = hw_conversions[kf_or_tf].to_sf;
21682 break;
21684 case E_KFmode:
21685 case E_IFmode:
21686 case E_TFmode:
21687 if (FLOAT128_IBM_P (dest_mode))
21688 cvt = trunc_optab;
21689 else
21690 do_move = true;
21691 break;
21693 case E_SImode:
21694 if (unsigned_p)
21696 cvt = ufix_optab;
21697 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
21699 else
21701 cvt = sfix_optab;
21702 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
21704 break;
21706 case E_DImode:
21707 if (unsigned_p)
21709 cvt = ufix_optab;
21710 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
21712 else
21714 cvt = sfix_optab;
21715 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
21717 break;
21719 default:
21720 gcc_unreachable ();
21724 /* Both IBM format. */
21725 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
21726 do_move = true;
21728 else
21729 gcc_unreachable ();
21731 /* Handle conversion between TFmode/KFmode/IFmode. */
21732 if (do_move)
21733 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
21735 /* Handle conversion if we have hardware support. */
21736 else if (TARGET_FLOAT128_HW && hw_convert)
21737 emit_insn ((hw_convert) (dest, src));
21739 /* Call an external function to do the conversion. */
21740 else if (cvt != unknown_optab)
21742 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
21743 gcc_assert (libfunc != NULL_RTX);
21745 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
21746 src, src_mode);
21748 gcc_assert (dest2 != NULL_RTX);
21749 if (!rtx_equal_p (dest, dest2))
21750 emit_move_insn (dest, dest2);
21753 else
21754 gcc_unreachable ();
21756 return;
21760 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
21761 can be used as that dest register. Return the dest register. */
21764 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
21766 if (op2 == const0_rtx)
21767 return op1;
21769 if (GET_CODE (scratch) == SCRATCH)
21770 scratch = gen_reg_rtx (mode);
21772 if (logical_operand (op2, mode))
21773 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
21774 else
21775 emit_insn (gen_rtx_SET (scratch,
21776 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
21778 return scratch;
21781 void
21782 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
21784 rtx condition_rtx;
21785 machine_mode op_mode;
21786 enum rtx_code cond_code;
21787 rtx result = operands[0];
21789 condition_rtx = rs6000_generate_compare (operands[1], mode);
21790 cond_code = GET_CODE (condition_rtx);
21792 if (cond_code == NE
21793 || cond_code == GE || cond_code == LE
21794 || cond_code == GEU || cond_code == LEU
21795 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
21797 rtx not_result = gen_reg_rtx (CCEQmode);
21798 rtx not_op, rev_cond_rtx;
21799 machine_mode cc_mode;
21801 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
21803 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
21804 SImode, XEXP (condition_rtx, 0), const0_rtx);
21805 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
21806 emit_insn (gen_rtx_SET (not_result, not_op));
21807 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
21810 op_mode = GET_MODE (XEXP (operands[1], 0));
21811 if (op_mode == VOIDmode)
21812 op_mode = GET_MODE (XEXP (operands[1], 1));
21814 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
21816 PUT_MODE (condition_rtx, DImode);
21817 convert_move (result, condition_rtx, 0);
21819 else
21821 PUT_MODE (condition_rtx, SImode);
21822 emit_insn (gen_rtx_SET (result, condition_rtx));
21826 /* Emit a branch of kind CODE to location LOC. */
21828 void
21829 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
21831 rtx condition_rtx, loc_ref;
21833 condition_rtx = rs6000_generate_compare (operands[0], mode);
21834 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
21835 emit_jump_insn (gen_rtx_SET (pc_rtx,
21836 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
21837 loc_ref, pc_rtx)));
21840 /* Return the string to output a conditional branch to LABEL, which is
21841 the operand template of the label, or NULL if the branch is really a
21842 conditional return.
21844 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
21845 condition code register and its mode specifies what kind of
21846 comparison we made.
21848 REVERSED is nonzero if we should reverse the sense of the comparison.
21850 INSN is the insn. */
21852 char *
21853 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
21855 static char string[64];
21856 enum rtx_code code = GET_CODE (op);
21857 rtx cc_reg = XEXP (op, 0);
21858 machine_mode mode = GET_MODE (cc_reg);
21859 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
21860 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
21861 int really_reversed = reversed ^ need_longbranch;
21862 char *s = string;
21863 const char *ccode;
21864 const char *pred;
21865 rtx note;
21867 validate_condition_mode (code, mode);
21869 /* Work out which way this really branches. We could use
21870 reverse_condition_maybe_unordered here always but this
21871 makes the resulting assembler clearer. */
21872 if (really_reversed)
21874 /* Reversal of FP compares takes care -- an ordered compare
21875 becomes an unordered compare and vice versa. */
21876 if (mode == CCFPmode)
21877 code = reverse_condition_maybe_unordered (code);
21878 else
21879 code = reverse_condition (code);
21882 switch (code)
21884 /* Not all of these are actually distinct opcodes, but
21885 we distinguish them for clarity of the resulting assembler. */
21886 case NE: case LTGT:
21887 ccode = "ne"; break;
21888 case EQ: case UNEQ:
21889 ccode = "eq"; break;
21890 case GE: case GEU:
21891 ccode = "ge"; break;
21892 case GT: case GTU: case UNGT:
21893 ccode = "gt"; break;
21894 case LE: case LEU:
21895 ccode = "le"; break;
21896 case LT: case LTU: case UNLT:
21897 ccode = "lt"; break;
21898 case UNORDERED: ccode = "un"; break;
21899 case ORDERED: ccode = "nu"; break;
21900 case UNGE: ccode = "nl"; break;
21901 case UNLE: ccode = "ng"; break;
21902 default:
21903 gcc_unreachable ();
21906 /* Maybe we have a guess as to how likely the branch is. */
21907 pred = "";
21908 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
21909 if (note != NULL_RTX)
21911 /* PROB is the difference from 50%. */
21912 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
21913 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
21915 /* Only hint for highly probable/improbable branches on newer cpus when
21916 we have real profile data, as static prediction overrides processor
21917 dynamic prediction. For older cpus we may as well always hint, but
21918 assume not taken for branches that are very close to 50% as a
21919 mispredicted taken branch is more expensive than a
21920 mispredicted not-taken branch. */
21921 if (rs6000_always_hint
21922 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
21923 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
21924 && br_prob_note_reliable_p (note)))
21926 if (abs (prob) > REG_BR_PROB_BASE / 20
21927 && ((prob > 0) ^ need_longbranch))
21928 pred = "+";
21929 else
21930 pred = "-";
21934 if (label == NULL)
21935 s += sprintf (s, "b%slr%s ", ccode, pred);
21936 else
21937 s += sprintf (s, "b%s%s ", ccode, pred);
21939 /* We need to escape any '%' characters in the reg_names string.
21940 Assume they'd only be the first character.... */
21941 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
21942 *s++ = '%';
21943 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
21945 if (label != NULL)
21947 /* If the branch distance was too far, we may have to use an
21948 unconditional branch to go the distance. */
21949 if (need_longbranch)
21950 s += sprintf (s, ",$+8\n\tb %s", label);
21951 else
21952 s += sprintf (s, ",%s", label);
21955 return string;
21958 /* Return insn for VSX or Altivec comparisons. */
21960 static rtx
21961 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
21963 rtx mask;
21964 machine_mode mode = GET_MODE (op0);
21966 switch (code)
21968 default:
21969 break;
21971 case GE:
21972 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21973 return NULL_RTX;
21974 /* FALLTHRU */
21976 case EQ:
21977 case GT:
21978 case GTU:
21979 case ORDERED:
21980 case UNORDERED:
21981 case UNEQ:
21982 case LTGT:
21983 mask = gen_reg_rtx (mode);
21984 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
21985 return mask;
21988 return NULL_RTX;
21991 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
21992 DMODE is expected destination mode. This is a recursive function. */
21994 static rtx
21995 rs6000_emit_vector_compare (enum rtx_code rcode,
21996 rtx op0, rtx op1,
21997 machine_mode dmode)
21999 rtx mask;
22000 bool swap_operands = false;
22001 bool try_again = false;
22003 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22004 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22006 /* See if the comparison works as is. */
22007 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22008 if (mask)
22009 return mask;
22011 switch (rcode)
22013 case LT:
22014 rcode = GT;
22015 swap_operands = true;
22016 try_again = true;
22017 break;
22018 case LTU:
22019 rcode = GTU;
22020 swap_operands = true;
22021 try_again = true;
22022 break;
22023 case NE:
22024 case UNLE:
22025 case UNLT:
22026 case UNGE:
22027 case UNGT:
22028 /* Invert condition and try again.
22029 e.g., A != B becomes ~(A==B). */
22031 enum rtx_code rev_code;
22032 enum insn_code nor_code;
22033 rtx mask2;
22035 rev_code = reverse_condition_maybe_unordered (rcode);
22036 if (rev_code == UNKNOWN)
22037 return NULL_RTX;
22039 nor_code = optab_handler (one_cmpl_optab, dmode);
22040 if (nor_code == CODE_FOR_nothing)
22041 return NULL_RTX;
22043 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22044 if (!mask2)
22045 return NULL_RTX;
22047 mask = gen_reg_rtx (dmode);
22048 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22049 return mask;
22051 break;
22052 case GE:
22053 case GEU:
22054 case LE:
22055 case LEU:
22056 /* Try GT/GTU/LT/LTU OR EQ */
22058 rtx c_rtx, eq_rtx;
22059 enum insn_code ior_code;
22060 enum rtx_code new_code;
22062 switch (rcode)
22064 case GE:
22065 new_code = GT;
22066 break;
22068 case GEU:
22069 new_code = GTU;
22070 break;
22072 case LE:
22073 new_code = LT;
22074 break;
22076 case LEU:
22077 new_code = LTU;
22078 break;
22080 default:
22081 gcc_unreachable ();
22084 ior_code = optab_handler (ior_optab, dmode);
22085 if (ior_code == CODE_FOR_nothing)
22086 return NULL_RTX;
22088 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22089 if (!c_rtx)
22090 return NULL_RTX;
22092 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22093 if (!eq_rtx)
22094 return NULL_RTX;
22096 mask = gen_reg_rtx (dmode);
22097 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22098 return mask;
22100 break;
22101 default:
22102 return NULL_RTX;
22105 if (try_again)
22107 if (swap_operands)
22108 std::swap (op0, op1);
22110 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22111 if (mask)
22112 return mask;
22115 /* You only get two chances. */
22116 return NULL_RTX;
22119 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22120 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22121 operands for the relation operation COND. */
22124 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22125 rtx cond, rtx cc_op0, rtx cc_op1)
22127 machine_mode dest_mode = GET_MODE (dest);
22128 machine_mode mask_mode = GET_MODE (cc_op0);
22129 enum rtx_code rcode = GET_CODE (cond);
22130 machine_mode cc_mode = CCmode;
22131 rtx mask;
22132 rtx cond2;
22133 bool invert_move = false;
22135 if (VECTOR_UNIT_NONE_P (dest_mode))
22136 return 0;
22138 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22139 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22141 switch (rcode)
22143 /* Swap operands if we can, and fall back to doing the operation as
22144 specified, and doing a NOR to invert the test. */
22145 case NE:
22146 case UNLE:
22147 case UNLT:
22148 case UNGE:
22149 case UNGT:
22150 /* Invert condition and try again.
22151 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22152 invert_move = true;
22153 rcode = reverse_condition_maybe_unordered (rcode);
22154 if (rcode == UNKNOWN)
22155 return 0;
22156 break;
22158 case GE:
22159 case LE:
22160 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22162 /* Invert condition to avoid compound test. */
22163 invert_move = true;
22164 rcode = reverse_condition (rcode);
22166 break;
22168 case GTU:
22169 case GEU:
22170 case LTU:
22171 case LEU:
22172 /* Mark unsigned tests with CCUNSmode. */
22173 cc_mode = CCUNSmode;
22175 /* Invert condition to avoid compound test if necessary. */
22176 if (rcode == GEU || rcode == LEU)
22178 invert_move = true;
22179 rcode = reverse_condition (rcode);
22181 break;
22183 default:
22184 break;
22187 /* Get the vector mask for the given relational operations. */
22188 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22190 if (!mask)
22191 return 0;
22193 if (invert_move)
22194 std::swap (op_true, op_false);
22196 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22197 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22198 && (GET_CODE (op_true) == CONST_VECTOR
22199 || GET_CODE (op_false) == CONST_VECTOR))
22201 rtx constant_0 = CONST0_RTX (dest_mode);
22202 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22204 if (op_true == constant_m1 && op_false == constant_0)
22206 emit_move_insn (dest, mask);
22207 return 1;
22210 else if (op_true == constant_0 && op_false == constant_m1)
22212 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22213 return 1;
22216 /* If we can't use the vector comparison directly, perhaps we can use
22217 the mask for the true or false fields, instead of loading up a
22218 constant. */
22219 if (op_true == constant_m1)
22220 op_true = mask;
22222 if (op_false == constant_0)
22223 op_false = mask;
22226 if (!REG_P (op_true) && !SUBREG_P (op_true))
22227 op_true = force_reg (dest_mode, op_true);
22229 if (!REG_P (op_false) && !SUBREG_P (op_false))
22230 op_false = force_reg (dest_mode, op_false);
22232 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22233 CONST0_RTX (dest_mode));
22234 emit_insn (gen_rtx_SET (dest,
22235 gen_rtx_IF_THEN_ELSE (dest_mode,
22236 cond2,
22237 op_true,
22238 op_false)));
22239 return 1;
22242 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22243 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22244 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22245 hardware has no such operation. */
22247 static int
22248 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22250 enum rtx_code code = GET_CODE (op);
22251 rtx op0 = XEXP (op, 0);
22252 rtx op1 = XEXP (op, 1);
22253 machine_mode compare_mode = GET_MODE (op0);
22254 machine_mode result_mode = GET_MODE (dest);
22255 bool max_p = false;
22257 if (result_mode != compare_mode)
22258 return 0;
22260 if (code == GE || code == GT)
22261 max_p = true;
22262 else if (code == LE || code == LT)
22263 max_p = false;
22264 else
22265 return 0;
22267 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22270 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22271 max_p = !max_p;
22273 else
22274 return 0;
22276 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22277 return 1;
22280 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22281 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22282 operands of the last comparison is nonzero/true, FALSE_COND if it is
22283 zero/false. Return 0 if the hardware has no such operation. */
22285 static int
22286 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22288 enum rtx_code code = GET_CODE (op);
22289 rtx op0 = XEXP (op, 0);
22290 rtx op1 = XEXP (op, 1);
22291 machine_mode result_mode = GET_MODE (dest);
22292 rtx compare_rtx;
22293 rtx cmove_rtx;
22294 rtx clobber_rtx;
22296 if (!can_create_pseudo_p ())
22297 return 0;
22299 switch (code)
22301 case EQ:
22302 case GE:
22303 case GT:
22304 break;
22306 case NE:
22307 case LT:
22308 case LE:
22309 code = swap_condition (code);
22310 std::swap (op0, op1);
22311 break;
22313 default:
22314 return 0;
22317 /* Generate: [(parallel [(set (dest)
22318 (if_then_else (op (cmp1) (cmp2))
22319 (true)
22320 (false)))
22321 (clobber (scratch))])]. */
22323 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22324 cmove_rtx = gen_rtx_SET (dest,
22325 gen_rtx_IF_THEN_ELSE (result_mode,
22326 compare_rtx,
22327 true_cond,
22328 false_cond));
22330 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22331 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22332 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22334 return 1;
22337 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22338 operands of the last comparison is nonzero/true, FALSE_COND if it
22339 is zero/false. Return 0 if the hardware has no such operation. */
22342 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22344 enum rtx_code code = GET_CODE (op);
22345 rtx op0 = XEXP (op, 0);
22346 rtx op1 = XEXP (op, 1);
22347 machine_mode compare_mode = GET_MODE (op0);
22348 machine_mode result_mode = GET_MODE (dest);
22349 rtx temp;
22350 bool is_against_zero;
22352 /* These modes should always match. */
22353 if (GET_MODE (op1) != compare_mode
22354 /* In the isel case however, we can use a compare immediate, so
22355 op1 may be a small constant. */
22356 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22357 return 0;
22358 if (GET_MODE (true_cond) != result_mode)
22359 return 0;
22360 if (GET_MODE (false_cond) != result_mode)
22361 return 0;
22363 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22364 if (TARGET_P9_MINMAX
22365 && (compare_mode == SFmode || compare_mode == DFmode)
22366 && (result_mode == SFmode || result_mode == DFmode))
22368 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22369 return 1;
22371 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22372 return 1;
22375 /* Don't allow using floating point comparisons for integer results for
22376 now. */
22377 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22378 return 0;
22380 /* First, work out if the hardware can do this at all, or
22381 if it's too slow.... */
22382 if (!FLOAT_MODE_P (compare_mode))
22384 if (TARGET_ISEL)
22385 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22386 return 0;
22389 is_against_zero = op1 == CONST0_RTX (compare_mode);
22391 /* A floating-point subtract might overflow, underflow, or produce
22392 an inexact result, thus changing the floating-point flags, so it
22393 can't be generated if we care about that. It's safe if one side
22394 of the construct is zero, since then no subtract will be
22395 generated. */
22396 if (SCALAR_FLOAT_MODE_P (compare_mode)
22397 && flag_trapping_math && ! is_against_zero)
22398 return 0;
22400 /* Eliminate half of the comparisons by switching operands, this
22401 makes the remaining code simpler. */
22402 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22403 || code == LTGT || code == LT || code == UNLE)
22405 code = reverse_condition_maybe_unordered (code);
22406 temp = true_cond;
22407 true_cond = false_cond;
22408 false_cond = temp;
22411 /* UNEQ and LTGT take four instructions for a comparison with zero,
22412 it'll probably be faster to use a branch here too. */
22413 if (code == UNEQ && HONOR_NANS (compare_mode))
22414 return 0;
22416 /* We're going to try to implement comparisons by performing
22417 a subtract, then comparing against zero. Unfortunately,
22418 Inf - Inf is NaN which is not zero, and so if we don't
22419 know that the operand is finite and the comparison
22420 would treat EQ different to UNORDERED, we can't do it. */
22421 if (HONOR_INFINITIES (compare_mode)
22422 && code != GT && code != UNGE
22423 && (GET_CODE (op1) != CONST_DOUBLE
22424 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22425 /* Constructs of the form (a OP b ? a : b) are safe. */
22426 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22427 || (! rtx_equal_p (op0, true_cond)
22428 && ! rtx_equal_p (op1, true_cond))))
22429 return 0;
22431 /* At this point we know we can use fsel. */
22433 /* Reduce the comparison to a comparison against zero. */
22434 if (! is_against_zero)
22436 temp = gen_reg_rtx (compare_mode);
22437 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22438 op0 = temp;
22439 op1 = CONST0_RTX (compare_mode);
22442 /* If we don't care about NaNs we can reduce some of the comparisons
22443 down to faster ones. */
22444 if (! HONOR_NANS (compare_mode))
22445 switch (code)
22447 case GT:
22448 code = LE;
22449 temp = true_cond;
22450 true_cond = false_cond;
22451 false_cond = temp;
22452 break;
22453 case UNGE:
22454 code = GE;
22455 break;
22456 case UNEQ:
22457 code = EQ;
22458 break;
22459 default:
22460 break;
22463 /* Now, reduce everything down to a GE. */
22464 switch (code)
22466 case GE:
22467 break;
22469 case LE:
22470 temp = gen_reg_rtx (compare_mode);
22471 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22472 op0 = temp;
22473 break;
22475 case ORDERED:
22476 temp = gen_reg_rtx (compare_mode);
22477 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22478 op0 = temp;
22479 break;
22481 case EQ:
22482 temp = gen_reg_rtx (compare_mode);
22483 emit_insn (gen_rtx_SET (temp,
22484 gen_rtx_NEG (compare_mode,
22485 gen_rtx_ABS (compare_mode, op0))));
22486 op0 = temp;
22487 break;
22489 case UNGE:
22490 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22491 temp = gen_reg_rtx (result_mode);
22492 emit_insn (gen_rtx_SET (temp,
22493 gen_rtx_IF_THEN_ELSE (result_mode,
22494 gen_rtx_GE (VOIDmode,
22495 op0, op1),
22496 true_cond, false_cond)));
22497 false_cond = true_cond;
22498 true_cond = temp;
22500 temp = gen_reg_rtx (compare_mode);
22501 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22502 op0 = temp;
22503 break;
22505 case GT:
22506 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22507 temp = gen_reg_rtx (result_mode);
22508 emit_insn (gen_rtx_SET (temp,
22509 gen_rtx_IF_THEN_ELSE (result_mode,
22510 gen_rtx_GE (VOIDmode,
22511 op0, op1),
22512 true_cond, false_cond)));
22513 true_cond = false_cond;
22514 false_cond = temp;
22516 temp = gen_reg_rtx (compare_mode);
22517 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22518 op0 = temp;
22519 break;
22521 default:
22522 gcc_unreachable ();
22525 emit_insn (gen_rtx_SET (dest,
22526 gen_rtx_IF_THEN_ELSE (result_mode,
22527 gen_rtx_GE (VOIDmode,
22528 op0, op1),
22529 true_cond, false_cond)));
22530 return 1;
22533 /* Same as above, but for ints (isel). */
22536 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22538 rtx condition_rtx, cr;
22539 machine_mode mode = GET_MODE (dest);
22540 enum rtx_code cond_code;
22541 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22542 bool signedp;
22544 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22545 return 0;
22547 /* We still have to do the compare, because isel doesn't do a
22548 compare, it just looks at the CRx bits set by a previous compare
22549 instruction. */
22550 condition_rtx = rs6000_generate_compare (op, mode);
22551 cond_code = GET_CODE (condition_rtx);
22552 cr = XEXP (condition_rtx, 0);
22553 signedp = GET_MODE (cr) == CCmode;
22555 isel_func = (mode == SImode
22556 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22557 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22559 switch (cond_code)
22561 case LT: case GT: case LTU: case GTU: case EQ:
22562 /* isel handles these directly. */
22563 break;
22565 default:
22566 /* We need to swap the sense of the comparison. */
22568 std::swap (false_cond, true_cond);
22569 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22571 break;
22574 false_cond = force_reg (mode, false_cond);
22575 if (true_cond != const0_rtx)
22576 true_cond = force_reg (mode, true_cond);
22578 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22580 return 1;
22583 void
22584 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22586 machine_mode mode = GET_MODE (op0);
22587 enum rtx_code c;
22588 rtx target;
22590 /* VSX/altivec have direct min/max insns. */
22591 if ((code == SMAX || code == SMIN)
22592 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22593 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22595 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22596 return;
22599 if (code == SMAX || code == SMIN)
22600 c = GE;
22601 else
22602 c = GEU;
22604 if (code == SMAX || code == UMAX)
22605 target = emit_conditional_move (dest, c, op0, op1, mode,
22606 op0, op1, mode, 0);
22607 else
22608 target = emit_conditional_move (dest, c, op0, op1, mode,
22609 op1, op0, mode, 0);
22610 gcc_assert (target);
22611 if (target != dest)
22612 emit_move_insn (dest, target);
22615 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22616 COND is true. Mark the jump as unlikely to be taken. */
22618 static void
22619 emit_unlikely_jump (rtx cond, rtx label)
22621 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22622 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22623 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
22626 /* A subroutine of the atomic operation splitters. Emit a load-locked
22627 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22628 the zero_extend operation. */
22630 static void
22631 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22633 rtx (*fn) (rtx, rtx) = NULL;
22635 switch (mode)
22637 case E_QImode:
22638 fn = gen_load_lockedqi;
22639 break;
22640 case E_HImode:
22641 fn = gen_load_lockedhi;
22642 break;
22643 case E_SImode:
22644 if (GET_MODE (mem) == QImode)
22645 fn = gen_load_lockedqi_si;
22646 else if (GET_MODE (mem) == HImode)
22647 fn = gen_load_lockedhi_si;
22648 else
22649 fn = gen_load_lockedsi;
22650 break;
22651 case E_DImode:
22652 fn = gen_load_lockeddi;
22653 break;
22654 case E_TImode:
22655 fn = gen_load_lockedti;
22656 break;
22657 default:
22658 gcc_unreachable ();
22660 emit_insn (fn (reg, mem));
22663 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22664 instruction in MODE. */
22666 static void
22667 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22669 rtx (*fn) (rtx, rtx, rtx) = NULL;
22671 switch (mode)
22673 case E_QImode:
22674 fn = gen_store_conditionalqi;
22675 break;
22676 case E_HImode:
22677 fn = gen_store_conditionalhi;
22678 break;
22679 case E_SImode:
22680 fn = gen_store_conditionalsi;
22681 break;
22682 case E_DImode:
22683 fn = gen_store_conditionaldi;
22684 break;
22685 case E_TImode:
22686 fn = gen_store_conditionalti;
22687 break;
22688 default:
22689 gcc_unreachable ();
22692 /* Emit sync before stwcx. to address PPC405 Erratum. */
22693 if (PPC405_ERRATUM77)
22694 emit_insn (gen_hwsync ());
22696 emit_insn (fn (res, mem, val));
22699 /* Expand barriers before and after a load_locked/store_cond sequence. */
22701 static rtx
22702 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
22704 rtx addr = XEXP (mem, 0);
22706 if (!legitimate_indirect_address_p (addr, reload_completed)
22707 && !legitimate_indexed_address_p (addr, reload_completed))
22709 addr = force_reg (Pmode, addr);
22710 mem = replace_equiv_address_nv (mem, addr);
22713 switch (model)
22715 case MEMMODEL_RELAXED:
22716 case MEMMODEL_CONSUME:
22717 case MEMMODEL_ACQUIRE:
22718 break;
22719 case MEMMODEL_RELEASE:
22720 case MEMMODEL_ACQ_REL:
22721 emit_insn (gen_lwsync ());
22722 break;
22723 case MEMMODEL_SEQ_CST:
22724 emit_insn (gen_hwsync ());
22725 break;
22726 default:
22727 gcc_unreachable ();
22729 return mem;
22732 static void
22733 rs6000_post_atomic_barrier (enum memmodel model)
22735 switch (model)
22737 case MEMMODEL_RELAXED:
22738 case MEMMODEL_CONSUME:
22739 case MEMMODEL_RELEASE:
22740 break;
22741 case MEMMODEL_ACQUIRE:
22742 case MEMMODEL_ACQ_REL:
22743 case MEMMODEL_SEQ_CST:
22744 emit_insn (gen_isync ());
22745 break;
22746 default:
22747 gcc_unreachable ();
22751 /* A subroutine of the various atomic expanders. For sub-word operations,
22752 we must adjust things to operate on SImode. Given the original MEM,
22753 return a new aligned memory. Also build and return the quantities by
22754 which to shift and mask. */
22756 static rtx
22757 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
22759 rtx addr, align, shift, mask, mem;
22760 HOST_WIDE_INT shift_mask;
22761 machine_mode mode = GET_MODE (orig_mem);
22763 /* For smaller modes, we have to implement this via SImode. */
22764 shift_mask = (mode == QImode ? 0x18 : 0x10);
22766 addr = XEXP (orig_mem, 0);
22767 addr = force_reg (GET_MODE (addr), addr);
22769 /* Aligned memory containing subword. Generate a new memory. We
22770 do not want any of the existing MEM_ATTR data, as we're now
22771 accessing memory outside the original object. */
22772 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
22773 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22774 mem = gen_rtx_MEM (SImode, align);
22775 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
22776 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
22777 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
22779 /* Shift amount for subword relative to aligned word. */
22780 shift = gen_reg_rtx (SImode);
22781 addr = gen_lowpart (SImode, addr);
22782 rtx tmp = gen_reg_rtx (SImode);
22783 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
22784 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
22785 if (BYTES_BIG_ENDIAN)
22786 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
22787 shift, 1, OPTAB_LIB_WIDEN);
22788 *pshift = shift;
22790 /* Mask for insertion. */
22791 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
22792 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
22793 *pmask = mask;
22795 return mem;
22798 /* A subroutine of the various atomic expanders. For sub-word operands,
22799 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
22801 static rtx
22802 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
22804 rtx x;
22806 x = gen_reg_rtx (SImode);
22807 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
22808 gen_rtx_NOT (SImode, mask),
22809 oldval)));
22811 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
22813 return x;
22816 /* A subroutine of the various atomic expanders. For sub-word operands,
22817 extract WIDE to NARROW via SHIFT. */
22819 static void
22820 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
22822 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
22823 wide, 1, OPTAB_LIB_WIDEN);
22824 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
22827 /* Expand an atomic compare and swap operation. */
22829 void
22830 rs6000_expand_atomic_compare_and_swap (rtx operands[])
22832 rtx boolval, retval, mem, oldval, newval, cond;
22833 rtx label1, label2, x, mask, shift;
22834 machine_mode mode, orig_mode;
22835 enum memmodel mod_s, mod_f;
22836 bool is_weak;
22838 boolval = operands[0];
22839 retval = operands[1];
22840 mem = operands[2];
22841 oldval = operands[3];
22842 newval = operands[4];
22843 is_weak = (INTVAL (operands[5]) != 0);
22844 mod_s = memmodel_base (INTVAL (operands[6]));
22845 mod_f = memmodel_base (INTVAL (operands[7]));
22846 orig_mode = mode = GET_MODE (mem);
22848 mask = shift = NULL_RTX;
22849 if (mode == QImode || mode == HImode)
22851 /* Before power8, we didn't have access to lbarx/lharx, so generate a
22852 lwarx and shift/mask operations. With power8, we need to do the
22853 comparison in SImode, but the store is still done in QI/HImode. */
22854 oldval = convert_modes (SImode, mode, oldval, 1);
22856 if (!TARGET_SYNC_HI_QI)
22858 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22860 /* Shift and mask OLDVAL into position with the word. */
22861 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22862 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22864 /* Shift and mask NEWVAL into position within the word. */
22865 newval = convert_modes (SImode, mode, newval, 1);
22866 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22867 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22870 /* Prepare to adjust the return value. */
22871 retval = gen_reg_rtx (SImode);
22872 mode = SImode;
22874 else if (reg_overlap_mentioned_p (retval, oldval))
22875 oldval = copy_to_reg (oldval);
22877 if (mode != TImode && !reg_or_short_operand (oldval, mode))
22878 oldval = copy_to_mode_reg (mode, oldval);
22880 if (reg_overlap_mentioned_p (retval, newval))
22881 newval = copy_to_reg (newval);
22883 mem = rs6000_pre_atomic_barrier (mem, mod_s);
22885 label1 = NULL_RTX;
22886 if (!is_weak)
22888 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22889 emit_label (XEXP (label1, 0));
22891 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22893 emit_load_locked (mode, retval, mem);
22895 x = retval;
22896 if (mask)
22897 x = expand_simple_binop (SImode, AND, retval, mask,
22898 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22900 cond = gen_reg_rtx (CCmode);
22901 /* If we have TImode, synthesize a comparison. */
22902 if (mode != TImode)
22903 x = gen_rtx_COMPARE (CCmode, x, oldval);
22904 else
22906 rtx xor1_result = gen_reg_rtx (DImode);
22907 rtx xor2_result = gen_reg_rtx (DImode);
22908 rtx or_result = gen_reg_rtx (DImode);
22909 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
22910 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
22911 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
22912 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
22914 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
22915 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
22916 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
22917 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
22920 emit_insn (gen_rtx_SET (cond, x));
22922 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22923 emit_unlikely_jump (x, label2);
22925 x = newval;
22926 if (mask)
22927 x = rs6000_mask_atomic_subword (retval, newval, mask);
22929 emit_store_conditional (orig_mode, cond, mem, x);
22931 if (!is_weak)
22933 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22934 emit_unlikely_jump (x, label1);
22937 if (!is_mm_relaxed (mod_f))
22938 emit_label (XEXP (label2, 0));
22940 rs6000_post_atomic_barrier (mod_s);
22942 if (is_mm_relaxed (mod_f))
22943 emit_label (XEXP (label2, 0));
22945 if (shift)
22946 rs6000_finish_atomic_subword (operands[1], retval, shift);
22947 else if (mode != GET_MODE (operands[1]))
22948 convert_move (operands[1], retval, 1);
22950 /* In all cases, CR0 contains EQ on success, and NE on failure. */
22951 x = gen_rtx_EQ (SImode, cond, const0_rtx);
22952 emit_insn (gen_rtx_SET (boolval, x));
22955 /* Expand an atomic exchange operation. */
22957 void
22958 rs6000_expand_atomic_exchange (rtx operands[])
22960 rtx retval, mem, val, cond;
22961 machine_mode mode;
22962 enum memmodel model;
22963 rtx label, x, mask, shift;
22965 retval = operands[0];
22966 mem = operands[1];
22967 val = operands[2];
22968 model = memmodel_base (INTVAL (operands[3]));
22969 mode = GET_MODE (mem);
22971 mask = shift = NULL_RTX;
22972 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
22974 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22976 /* Shift and mask VAL into position with the word. */
22977 val = convert_modes (SImode, mode, val, 1);
22978 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22979 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22981 /* Prepare to adjust the return value. */
22982 retval = gen_reg_rtx (SImode);
22983 mode = SImode;
22986 mem = rs6000_pre_atomic_barrier (mem, model);
22988 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22989 emit_label (XEXP (label, 0));
22991 emit_load_locked (mode, retval, mem);
22993 x = val;
22994 if (mask)
22995 x = rs6000_mask_atomic_subword (retval, val, mask);
22997 cond = gen_reg_rtx (CCmode);
22998 emit_store_conditional (mode, cond, mem, x);
23000 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23001 emit_unlikely_jump (x, label);
23003 rs6000_post_atomic_barrier (model);
23005 if (shift)
23006 rs6000_finish_atomic_subword (operands[0], retval, shift);
23009 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23010 to perform. MEM is the memory on which to operate. VAL is the second
23011 operand of the binary operator. BEFORE and AFTER are optional locations to
23012 return the value of MEM either before of after the operation. MODEL_RTX
23013 is a CONST_INT containing the memory model to use. */
23015 void
23016 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23017 rtx orig_before, rtx orig_after, rtx model_rtx)
23019 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23020 machine_mode mode = GET_MODE (mem);
23021 machine_mode store_mode = mode;
23022 rtx label, x, cond, mask, shift;
23023 rtx before = orig_before, after = orig_after;
23025 mask = shift = NULL_RTX;
23026 /* On power8, we want to use SImode for the operation. On previous systems,
23027 use the operation in a subword and shift/mask to get the proper byte or
23028 halfword. */
23029 if (mode == QImode || mode == HImode)
23031 if (TARGET_SYNC_HI_QI)
23033 val = convert_modes (SImode, mode, val, 1);
23035 /* Prepare to adjust the return value. */
23036 before = gen_reg_rtx (SImode);
23037 if (after)
23038 after = gen_reg_rtx (SImode);
23039 mode = SImode;
23041 else
23043 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23045 /* Shift and mask VAL into position with the word. */
23046 val = convert_modes (SImode, mode, val, 1);
23047 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23048 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23050 switch (code)
23052 case IOR:
23053 case XOR:
23054 /* We've already zero-extended VAL. That is sufficient to
23055 make certain that it does not affect other bits. */
23056 mask = NULL;
23057 break;
23059 case AND:
23060 /* If we make certain that all of the other bits in VAL are
23061 set, that will be sufficient to not affect other bits. */
23062 x = gen_rtx_NOT (SImode, mask);
23063 x = gen_rtx_IOR (SImode, x, val);
23064 emit_insn (gen_rtx_SET (val, x));
23065 mask = NULL;
23066 break;
23068 case NOT:
23069 case PLUS:
23070 case MINUS:
23071 /* These will all affect bits outside the field and need
23072 adjustment via MASK within the loop. */
23073 break;
23075 default:
23076 gcc_unreachable ();
23079 /* Prepare to adjust the return value. */
23080 before = gen_reg_rtx (SImode);
23081 if (after)
23082 after = gen_reg_rtx (SImode);
23083 store_mode = mode = SImode;
23087 mem = rs6000_pre_atomic_barrier (mem, model);
23089 label = gen_label_rtx ();
23090 emit_label (label);
23091 label = gen_rtx_LABEL_REF (VOIDmode, label);
23093 if (before == NULL_RTX)
23094 before = gen_reg_rtx (mode);
23096 emit_load_locked (mode, before, mem);
23098 if (code == NOT)
23100 x = expand_simple_binop (mode, AND, before, val,
23101 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23102 after = expand_simple_unop (mode, NOT, x, after, 1);
23104 else
23106 after = expand_simple_binop (mode, code, before, val,
23107 after, 1, OPTAB_LIB_WIDEN);
23110 x = after;
23111 if (mask)
23113 x = expand_simple_binop (SImode, AND, after, mask,
23114 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23115 x = rs6000_mask_atomic_subword (before, x, mask);
23117 else if (store_mode != mode)
23118 x = convert_modes (store_mode, mode, x, 1);
23120 cond = gen_reg_rtx (CCmode);
23121 emit_store_conditional (store_mode, cond, mem, x);
23123 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23124 emit_unlikely_jump (x, label);
23126 rs6000_post_atomic_barrier (model);
23128 if (shift)
23130 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23131 then do the calcuations in a SImode register. */
23132 if (orig_before)
23133 rs6000_finish_atomic_subword (orig_before, before, shift);
23134 if (orig_after)
23135 rs6000_finish_atomic_subword (orig_after, after, shift);
23137 else if (store_mode != mode)
23139 /* QImode/HImode on machines with lbarx/lharx where we do the native
23140 operation and then do the calcuations in a SImode register. */
23141 if (orig_before)
23142 convert_move (orig_before, before, 1);
23143 if (orig_after)
23144 convert_move (orig_after, after, 1);
23146 else if (orig_after && after != orig_after)
23147 emit_move_insn (orig_after, after);
23150 /* Emit instructions to move SRC to DST. Called by splitters for
23151 multi-register moves. It will emit at most one instruction for
23152 each register that is accessed; that is, it won't emit li/lis pairs
23153 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23154 register. */
23156 void
23157 rs6000_split_multireg_move (rtx dst, rtx src)
23159 /* The register number of the first register being moved. */
23160 int reg;
23161 /* The mode that is to be moved. */
23162 machine_mode mode;
23163 /* The mode that the move is being done in, and its size. */
23164 machine_mode reg_mode;
23165 int reg_mode_size;
23166 /* The number of registers that will be moved. */
23167 int nregs;
23169 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23170 mode = GET_MODE (dst);
23171 nregs = hard_regno_nregs (reg, mode);
23172 if (FP_REGNO_P (reg))
23173 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23174 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23175 else if (ALTIVEC_REGNO_P (reg))
23176 reg_mode = V16QImode;
23177 else
23178 reg_mode = word_mode;
23179 reg_mode_size = GET_MODE_SIZE (reg_mode);
23181 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23183 /* TDmode residing in FP registers is special, since the ISA requires that
23184 the lower-numbered word of a register pair is always the most significant
23185 word, even in little-endian mode. This does not match the usual subreg
23186 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23187 the appropriate constituent registers "by hand" in little-endian mode.
23189 Note we do not need to check for destructive overlap here since TDmode
23190 can only reside in even/odd register pairs. */
23191 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23193 rtx p_src, p_dst;
23194 int i;
23196 for (i = 0; i < nregs; i++)
23198 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23199 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23200 else
23201 p_src = simplify_gen_subreg (reg_mode, src, mode,
23202 i * reg_mode_size);
23204 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23205 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23206 else
23207 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23208 i * reg_mode_size);
23210 emit_insn (gen_rtx_SET (p_dst, p_src));
23213 return;
23216 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23218 /* Move register range backwards, if we might have destructive
23219 overlap. */
23220 int i;
23221 for (i = nregs - 1; i >= 0; i--)
23222 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23223 i * reg_mode_size),
23224 simplify_gen_subreg (reg_mode, src, mode,
23225 i * reg_mode_size)));
23227 else
23229 int i;
23230 int j = -1;
23231 bool used_update = false;
23232 rtx restore_basereg = NULL_RTX;
23234 if (MEM_P (src) && INT_REGNO_P (reg))
23236 rtx breg;
23238 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23239 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23241 rtx delta_rtx;
23242 breg = XEXP (XEXP (src, 0), 0);
23243 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23244 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23245 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23246 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23247 src = replace_equiv_address (src, breg);
23249 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23251 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23253 rtx basereg = XEXP (XEXP (src, 0), 0);
23254 if (TARGET_UPDATE)
23256 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23257 emit_insn (gen_rtx_SET (ndst,
23258 gen_rtx_MEM (reg_mode,
23259 XEXP (src, 0))));
23260 used_update = true;
23262 else
23263 emit_insn (gen_rtx_SET (basereg,
23264 XEXP (XEXP (src, 0), 1)));
23265 src = replace_equiv_address (src, basereg);
23267 else
23269 rtx basereg = gen_rtx_REG (Pmode, reg);
23270 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23271 src = replace_equiv_address (src, basereg);
23275 breg = XEXP (src, 0);
23276 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23277 breg = XEXP (breg, 0);
23279 /* If the base register we are using to address memory is
23280 also a destination reg, then change that register last. */
23281 if (REG_P (breg)
23282 && REGNO (breg) >= REGNO (dst)
23283 && REGNO (breg) < REGNO (dst) + nregs)
23284 j = REGNO (breg) - REGNO (dst);
23286 else if (MEM_P (dst) && INT_REGNO_P (reg))
23288 rtx breg;
23290 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23291 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23293 rtx delta_rtx;
23294 breg = XEXP (XEXP (dst, 0), 0);
23295 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23296 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23297 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23299 /* We have to update the breg before doing the store.
23300 Use store with update, if available. */
23302 if (TARGET_UPDATE)
23304 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23305 emit_insn (TARGET_32BIT
23306 ? (TARGET_POWERPC64
23307 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23308 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
23309 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23310 used_update = true;
23312 else
23313 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23314 dst = replace_equiv_address (dst, breg);
23316 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23317 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23319 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23321 rtx basereg = XEXP (XEXP (dst, 0), 0);
23322 if (TARGET_UPDATE)
23324 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23325 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23326 XEXP (dst, 0)),
23327 nsrc));
23328 used_update = true;
23330 else
23331 emit_insn (gen_rtx_SET (basereg,
23332 XEXP (XEXP (dst, 0), 1)));
23333 dst = replace_equiv_address (dst, basereg);
23335 else
23337 rtx basereg = XEXP (XEXP (dst, 0), 0);
23338 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23339 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23340 && REG_P (basereg)
23341 && REG_P (offsetreg)
23342 && REGNO (basereg) != REGNO (offsetreg));
23343 if (REGNO (basereg) == 0)
23345 rtx tmp = offsetreg;
23346 offsetreg = basereg;
23347 basereg = tmp;
23349 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23350 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23351 dst = replace_equiv_address (dst, basereg);
23354 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23355 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23358 for (i = 0; i < nregs; i++)
23360 /* Calculate index to next subword. */
23361 ++j;
23362 if (j == nregs)
23363 j = 0;
23365 /* If compiler already emitted move of first word by
23366 store with update, no need to do anything. */
23367 if (j == 0 && used_update)
23368 continue;
23370 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23371 j * reg_mode_size),
23372 simplify_gen_subreg (reg_mode, src, mode,
23373 j * reg_mode_size)));
23375 if (restore_basereg != NULL_RTX)
23376 emit_insn (restore_basereg);
23381 /* This page contains routines that are used to determine what the
23382 function prologue and epilogue code will do and write them out. */
23384 /* Determine whether the REG is really used. */
23386 static bool
23387 save_reg_p (int reg)
23389 /* We need to mark the PIC offset register live for the same conditions
23390 as it is set up, or otherwise it won't be saved before we clobber it. */
23392 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
23394 /* When calling eh_return, we must return true for all the cases
23395 where conditional_register_usage marks the PIC offset reg
23396 call used. */
23397 if (TARGET_TOC && TARGET_MINIMAL_TOC
23398 && (crtl->calls_eh_return
23399 || df_regs_ever_live_p (reg)
23400 || !constant_pool_empty_p ()))
23401 return true;
23403 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
23404 && flag_pic)
23405 return true;
23408 return !call_used_regs[reg] && df_regs_ever_live_p (reg);
23411 /* Return the first fixed-point register that is required to be
23412 saved. 32 if none. */
23415 first_reg_to_save (void)
23417 int first_reg;
23419 /* Find lowest numbered live register. */
23420 for (first_reg = 13; first_reg <= 31; first_reg++)
23421 if (save_reg_p (first_reg))
23422 break;
23424 #if TARGET_MACHO
23425 if (flag_pic
23426 && crtl->uses_pic_offset_table
23427 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
23428 return RS6000_PIC_OFFSET_TABLE_REGNUM;
23429 #endif
23431 return first_reg;
23434 /* Similar, for FP regs. */
23437 first_fp_reg_to_save (void)
23439 int first_reg;
23441 /* Find lowest numbered live register. */
23442 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
23443 if (save_reg_p (first_reg))
23444 break;
23446 return first_reg;
23449 /* Similar, for AltiVec regs. */
23451 static int
23452 first_altivec_reg_to_save (void)
23454 int i;
23456 /* Stack frame remains as is unless we are in AltiVec ABI. */
23457 if (! TARGET_ALTIVEC_ABI)
23458 return LAST_ALTIVEC_REGNO + 1;
23460 /* On Darwin, the unwind routines are compiled without
23461 TARGET_ALTIVEC, and use save_world to save/restore the
23462 altivec registers when necessary. */
23463 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23464 && ! TARGET_ALTIVEC)
23465 return FIRST_ALTIVEC_REGNO + 20;
23467 /* Find lowest numbered live register. */
23468 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
23469 if (save_reg_p (i))
23470 break;
23472 return i;
23475 /* Return a 32-bit mask of the AltiVec registers we need to set in
23476 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
23477 the 32-bit word is 0. */
23479 static unsigned int
23480 compute_vrsave_mask (void)
23482 unsigned int i, mask = 0;
23484 /* On Darwin, the unwind routines are compiled without
23485 TARGET_ALTIVEC, and use save_world to save/restore the
23486 call-saved altivec registers when necessary. */
23487 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23488 && ! TARGET_ALTIVEC)
23489 mask |= 0xFFF;
23491 /* First, find out if we use _any_ altivec registers. */
23492 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
23493 if (df_regs_ever_live_p (i))
23494 mask |= ALTIVEC_REG_BIT (i);
23496 if (mask == 0)
23497 return mask;
23499 /* Next, remove the argument registers from the set. These must
23500 be in the VRSAVE mask set by the caller, so we don't need to add
23501 them in again. More importantly, the mask we compute here is
23502 used to generate CLOBBERs in the set_vrsave insn, and we do not
23503 wish the argument registers to die. */
23504 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
23505 mask &= ~ALTIVEC_REG_BIT (i);
23507 /* Similarly, remove the return value from the set. */
23509 bool yes = false;
23510 diddle_return_value (is_altivec_return_reg, &yes);
23511 if (yes)
23512 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
23515 return mask;
23518 /* For a very restricted set of circumstances, we can cut down the
23519 size of prologues/epilogues by calling our own save/restore-the-world
23520 routines. */
23522 static void
23523 compute_save_world_info (rs6000_stack_t *info)
23525 info->world_save_p = 1;
23526 info->world_save_p
23527 = (WORLD_SAVE_P (info)
23528 && DEFAULT_ABI == ABI_DARWIN
23529 && !cfun->has_nonlocal_label
23530 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
23531 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
23532 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
23533 && info->cr_save_p);
23535 /* This will not work in conjunction with sibcalls. Make sure there
23536 are none. (This check is expensive, but seldom executed.) */
23537 if (WORLD_SAVE_P (info))
23539 rtx_insn *insn;
23540 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
23541 if (CALL_P (insn) && SIBLING_CALL_P (insn))
23543 info->world_save_p = 0;
23544 break;
23548 if (WORLD_SAVE_P (info))
23550 /* Even if we're not touching VRsave, make sure there's room on the
23551 stack for it, if it looks like we're calling SAVE_WORLD, which
23552 will attempt to save it. */
23553 info->vrsave_size = 4;
23555 /* If we are going to save the world, we need to save the link register too. */
23556 info->lr_save_p = 1;
23558 /* "Save" the VRsave register too if we're saving the world. */
23559 if (info->vrsave_mask == 0)
23560 info->vrsave_mask = compute_vrsave_mask ();
23562 /* Because the Darwin register save/restore routines only handle
23563 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
23564 check. */
23565 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
23566 && (info->first_altivec_reg_save
23567 >= FIRST_SAVED_ALTIVEC_REGNO));
23570 return;
23574 static void
23575 is_altivec_return_reg (rtx reg, void *xyes)
23577 bool *yes = (bool *) xyes;
23578 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
23579 *yes = true;
23583 /* Return whether REG is a global user reg or has been specifed by
23584 -ffixed-REG. We should not restore these, and so cannot use
23585 lmw or out-of-line restore functions if there are any. We also
23586 can't save them (well, emit frame notes for them), because frame
23587 unwinding during exception handling will restore saved registers. */
23589 static bool
23590 fixed_reg_p (int reg)
23592 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
23593 backend sets it, overriding anything the user might have given. */
23594 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23595 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
23596 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23597 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
23598 return false;
23600 return fixed_regs[reg];
23603 /* Determine the strategy for savings/restoring registers. */
23605 enum {
23606 SAVE_MULTIPLE = 0x1,
23607 SAVE_INLINE_GPRS = 0x2,
23608 SAVE_INLINE_FPRS = 0x4,
23609 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
23610 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
23611 SAVE_INLINE_VRS = 0x20,
23612 REST_MULTIPLE = 0x100,
23613 REST_INLINE_GPRS = 0x200,
23614 REST_INLINE_FPRS = 0x400,
23615 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
23616 REST_INLINE_VRS = 0x1000
23619 static int
23620 rs6000_savres_strategy (rs6000_stack_t *info,
23621 bool using_static_chain_p)
23623 int strategy = 0;
23625 /* Select between in-line and out-of-line save and restore of regs.
23626 First, all the obvious cases where we don't use out-of-line. */
23627 if (crtl->calls_eh_return
23628 || cfun->machine->ra_need_lr)
23629 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
23630 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
23631 | SAVE_INLINE_VRS | REST_INLINE_VRS);
23633 if (info->first_gp_reg_save == 32)
23634 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23636 if (info->first_fp_reg_save == 64)
23637 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23639 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
23640 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23642 /* Define cutoff for using out-of-line functions to save registers. */
23643 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
23645 if (!optimize_size)
23647 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23648 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23649 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23651 else
23653 /* Prefer out-of-line restore if it will exit. */
23654 if (info->first_fp_reg_save > 61)
23655 strategy |= SAVE_INLINE_FPRS;
23656 if (info->first_gp_reg_save > 29)
23658 if (info->first_fp_reg_save == 64)
23659 strategy |= SAVE_INLINE_GPRS;
23660 else
23661 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23663 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
23664 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23667 else if (DEFAULT_ABI == ABI_DARWIN)
23669 if (info->first_fp_reg_save > 60)
23670 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23671 if (info->first_gp_reg_save > 29)
23672 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23673 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23675 else
23677 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23678 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
23679 || info->first_fp_reg_save > 61)
23680 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23681 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23682 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23685 /* Don't bother to try to save things out-of-line if r11 is occupied
23686 by the static chain. It would require too much fiddling and the
23687 static chain is rarely used anyway. FPRs are saved w.r.t the stack
23688 pointer on Darwin, and AIX uses r1 or r12. */
23689 if (using_static_chain_p
23690 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
23691 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
23692 | SAVE_INLINE_GPRS
23693 | SAVE_INLINE_VRS);
23695 /* Don't ever restore fixed regs. That means we can't use the
23696 out-of-line register restore functions if a fixed reg is in the
23697 range of regs restored. */
23698 if (!(strategy & REST_INLINE_FPRS))
23699 for (int i = info->first_fp_reg_save; i < 64; i++)
23700 if (fixed_regs[i])
23702 strategy |= REST_INLINE_FPRS;
23703 break;
23706 /* We can only use the out-of-line routines to restore fprs if we've
23707 saved all the registers from first_fp_reg_save in the prologue.
23708 Otherwise, we risk loading garbage. Of course, if we have saved
23709 out-of-line then we know we haven't skipped any fprs. */
23710 if ((strategy & SAVE_INLINE_FPRS)
23711 && !(strategy & REST_INLINE_FPRS))
23712 for (int i = info->first_fp_reg_save; i < 64; i++)
23713 if (!save_reg_p (i))
23715 strategy |= REST_INLINE_FPRS;
23716 break;
23719 /* Similarly, for altivec regs. */
23720 if (!(strategy & REST_INLINE_VRS))
23721 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23722 if (fixed_regs[i])
23724 strategy |= REST_INLINE_VRS;
23725 break;
23728 if ((strategy & SAVE_INLINE_VRS)
23729 && !(strategy & REST_INLINE_VRS))
23730 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23731 if (!save_reg_p (i))
23733 strategy |= REST_INLINE_VRS;
23734 break;
23737 /* info->lr_save_p isn't yet set if the only reason lr needs to be
23738 saved is an out-of-line save or restore. Set up the value for
23739 the next test (excluding out-of-line gprs). */
23740 bool lr_save_p = (info->lr_save_p
23741 || !(strategy & SAVE_INLINE_FPRS)
23742 || !(strategy & SAVE_INLINE_VRS)
23743 || !(strategy & REST_INLINE_FPRS)
23744 || !(strategy & REST_INLINE_VRS));
23746 if (TARGET_MULTIPLE
23747 && !TARGET_POWERPC64
23748 && info->first_gp_reg_save < 31
23749 && !(flag_shrink_wrap
23750 && flag_shrink_wrap_separate
23751 && optimize_function_for_speed_p (cfun)))
23753 int count = 0;
23754 for (int i = info->first_gp_reg_save; i < 32; i++)
23755 if (save_reg_p (i))
23756 count++;
23758 if (count <= 1)
23759 /* Don't use store multiple if only one reg needs to be
23760 saved. This can occur for example when the ABI_V4 pic reg
23761 (r30) needs to be saved to make calls, but r31 is not
23762 used. */
23763 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23764 else
23766 /* Prefer store multiple for saves over out-of-line
23767 routines, since the store-multiple instruction will
23768 always be smaller. */
23769 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
23771 /* The situation is more complicated with load multiple.
23772 We'd prefer to use the out-of-line routines for restores,
23773 since the "exit" out-of-line routines can handle the
23774 restore of LR and the frame teardown. However if doesn't
23775 make sense to use the out-of-line routine if that is the
23776 only reason we'd need to save LR, and we can't use the
23777 "exit" out-of-line gpr restore if we have saved some
23778 fprs; In those cases it is advantageous to use load
23779 multiple when available. */
23780 if (info->first_fp_reg_save != 64 || !lr_save_p)
23781 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
23785 /* Using the "exit" out-of-line routine does not improve code size
23786 if using it would require lr to be saved and if only saving one
23787 or two gprs. */
23788 else if (!lr_save_p && info->first_gp_reg_save > 29)
23789 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23791 /* Don't ever restore fixed regs. */
23792 if ((strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23793 for (int i = info->first_gp_reg_save; i < 32; i++)
23794 if (fixed_reg_p (i))
23796 strategy |= REST_INLINE_GPRS;
23797 strategy &= ~REST_MULTIPLE;
23798 break;
23801 /* We can only use load multiple or the out-of-line routines to
23802 restore gprs if we've saved all the registers from
23803 first_gp_reg_save. Otherwise, we risk loading garbage.
23804 Of course, if we have saved out-of-line or used stmw then we know
23805 we haven't skipped any gprs. */
23806 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
23807 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23808 for (int i = info->first_gp_reg_save; i < 32; i++)
23809 if (!save_reg_p (i))
23811 strategy |= REST_INLINE_GPRS;
23812 strategy &= ~REST_MULTIPLE;
23813 break;
23816 if (TARGET_ELF && TARGET_64BIT)
23818 if (!(strategy & SAVE_INLINE_FPRS))
23819 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23820 else if (!(strategy & SAVE_INLINE_GPRS)
23821 && info->first_fp_reg_save == 64)
23822 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
23824 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
23825 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
23827 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
23828 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23830 return strategy;
23833 /* Calculate the stack information for the current function. This is
23834 complicated by having two separate calling sequences, the AIX calling
23835 sequence and the V.4 calling sequence.
23837 AIX (and Darwin/Mac OS X) stack frames look like:
23838 32-bit 64-bit
23839 SP----> +---------------------------------------+
23840 | back chain to caller | 0 0
23841 +---------------------------------------+
23842 | saved CR | 4 8 (8-11)
23843 +---------------------------------------+
23844 | saved LR | 8 16
23845 +---------------------------------------+
23846 | reserved for compilers | 12 24
23847 +---------------------------------------+
23848 | reserved for binders | 16 32
23849 +---------------------------------------+
23850 | saved TOC pointer | 20 40
23851 +---------------------------------------+
23852 | Parameter save area (+padding*) (P) | 24 48
23853 +---------------------------------------+
23854 | Alloca space (A) | 24+P etc.
23855 +---------------------------------------+
23856 | Local variable space (L) | 24+P+A
23857 +---------------------------------------+
23858 | Float/int conversion temporary (X) | 24+P+A+L
23859 +---------------------------------------+
23860 | Save area for AltiVec registers (W) | 24+P+A+L+X
23861 +---------------------------------------+
23862 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
23863 +---------------------------------------+
23864 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
23865 +---------------------------------------+
23866 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
23867 +---------------------------------------+
23868 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
23869 +---------------------------------------+
23870 old SP->| back chain to caller's caller |
23871 +---------------------------------------+
23873 * If the alloca area is present, the parameter save area is
23874 padded so that the former starts 16-byte aligned.
23876 The required alignment for AIX configurations is two words (i.e., 8
23877 or 16 bytes).
23879 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
23881 SP----> +---------------------------------------+
23882 | Back chain to caller | 0
23883 +---------------------------------------+
23884 | Save area for CR | 8
23885 +---------------------------------------+
23886 | Saved LR | 16
23887 +---------------------------------------+
23888 | Saved TOC pointer | 24
23889 +---------------------------------------+
23890 | Parameter save area (+padding*) (P) | 32
23891 +---------------------------------------+
23892 | Alloca space (A) | 32+P
23893 +---------------------------------------+
23894 | Local variable space (L) | 32+P+A
23895 +---------------------------------------+
23896 | Save area for AltiVec registers (W) | 32+P+A+L
23897 +---------------------------------------+
23898 | AltiVec alignment padding (Y) | 32+P+A+L+W
23899 +---------------------------------------+
23900 | Save area for GP registers (G) | 32+P+A+L+W+Y
23901 +---------------------------------------+
23902 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
23903 +---------------------------------------+
23904 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
23905 +---------------------------------------+
23907 * If the alloca area is present, the parameter save area is
23908 padded so that the former starts 16-byte aligned.
23910 V.4 stack frames look like:
23912 SP----> +---------------------------------------+
23913 | back chain to caller | 0
23914 +---------------------------------------+
23915 | caller's saved LR | 4
23916 +---------------------------------------+
23917 | Parameter save area (+padding*) (P) | 8
23918 +---------------------------------------+
23919 | Alloca space (A) | 8+P
23920 +---------------------------------------+
23921 | Varargs save area (V) | 8+P+A
23922 +---------------------------------------+
23923 | Local variable space (L) | 8+P+A+V
23924 +---------------------------------------+
23925 | Float/int conversion temporary (X) | 8+P+A+V+L
23926 +---------------------------------------+
23927 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
23928 +---------------------------------------+
23929 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
23930 +---------------------------------------+
23931 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
23932 +---------------------------------------+
23933 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
23934 +---------------------------------------+
23935 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
23936 +---------------------------------------+
23937 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
23938 +---------------------------------------+
23939 old SP->| back chain to caller's caller |
23940 +---------------------------------------+
23942 * If the alloca area is present and the required alignment is
23943 16 bytes, the parameter save area is padded so that the
23944 alloca area starts 16-byte aligned.
23946 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
23947 given. (But note below and in sysv4.h that we require only 8 and
23948 may round up the size of our stack frame anyways. The historical
23949 reason is early versions of powerpc-linux which didn't properly
23950 align the stack at program startup. A happy side-effect is that
23951 -mno-eabi libraries can be used with -meabi programs.)
23953 The EABI configuration defaults to the V.4 layout. However,
23954 the stack alignment requirements may differ. If -mno-eabi is not
23955 given, the required stack alignment is 8 bytes; if -mno-eabi is
23956 given, the required alignment is 16 bytes. (But see V.4 comment
23957 above.) */
23959 #ifndef ABI_STACK_BOUNDARY
23960 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
23961 #endif
23963 static rs6000_stack_t *
23964 rs6000_stack_info (void)
23966 /* We should never be called for thunks, we are not set up for that. */
23967 gcc_assert (!cfun->is_thunk);
23969 rs6000_stack_t *info = &stack_info;
23970 int reg_size = TARGET_32BIT ? 4 : 8;
23971 int ehrd_size;
23972 int ehcr_size;
23973 int save_align;
23974 int first_gp;
23975 HOST_WIDE_INT non_fixed_size;
23976 bool using_static_chain_p;
23978 if (reload_completed && info->reload_completed)
23979 return info;
23981 memset (info, 0, sizeof (*info));
23982 info->reload_completed = reload_completed;
23984 /* Select which calling sequence. */
23985 info->abi = DEFAULT_ABI;
23987 /* Calculate which registers need to be saved & save area size. */
23988 info->first_gp_reg_save = first_reg_to_save ();
23989 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
23990 even if it currently looks like we won't. Reload may need it to
23991 get at a constant; if so, it will have already created a constant
23992 pool entry for it. */
23993 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
23994 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
23995 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
23996 && crtl->uses_const_pool
23997 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
23998 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
23999 else
24000 first_gp = info->first_gp_reg_save;
24002 info->gp_size = reg_size * (32 - first_gp);
24004 info->first_fp_reg_save = first_fp_reg_to_save ();
24005 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24007 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24008 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24009 - info->first_altivec_reg_save);
24011 /* Does this function call anything? */
24012 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24014 /* Determine if we need to save the condition code registers. */
24015 if (save_reg_p (CR2_REGNO)
24016 || save_reg_p (CR3_REGNO)
24017 || save_reg_p (CR4_REGNO))
24019 info->cr_save_p = 1;
24020 if (DEFAULT_ABI == ABI_V4)
24021 info->cr_size = reg_size;
24024 /* If the current function calls __builtin_eh_return, then we need
24025 to allocate stack space for registers that will hold data for
24026 the exception handler. */
24027 if (crtl->calls_eh_return)
24029 unsigned int i;
24030 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24031 continue;
24033 ehrd_size = i * UNITS_PER_WORD;
24035 else
24036 ehrd_size = 0;
24038 /* In the ELFv2 ABI, we also need to allocate space for separate
24039 CR field save areas if the function calls __builtin_eh_return. */
24040 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24042 /* This hard-codes that we have three call-saved CR fields. */
24043 ehcr_size = 3 * reg_size;
24044 /* We do *not* use the regular CR save mechanism. */
24045 info->cr_save_p = 0;
24047 else
24048 ehcr_size = 0;
24050 /* Determine various sizes. */
24051 info->reg_size = reg_size;
24052 info->fixed_size = RS6000_SAVE_AREA;
24053 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24054 if (cfun->calls_alloca)
24055 info->parm_size =
24056 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24057 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24058 else
24059 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24060 TARGET_ALTIVEC ? 16 : 8);
24061 if (FRAME_GROWS_DOWNWARD)
24062 info->vars_size
24063 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24064 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24065 - (info->fixed_size + info->vars_size + info->parm_size);
24067 if (TARGET_ALTIVEC_ABI)
24068 info->vrsave_mask = compute_vrsave_mask ();
24070 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24071 info->vrsave_size = 4;
24073 compute_save_world_info (info);
24075 /* Calculate the offsets. */
24076 switch (DEFAULT_ABI)
24078 case ABI_NONE:
24079 default:
24080 gcc_unreachable ();
24082 case ABI_AIX:
24083 case ABI_ELFv2:
24084 case ABI_DARWIN:
24085 info->fp_save_offset = -info->fp_size;
24086 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24088 if (TARGET_ALTIVEC_ABI)
24090 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24092 /* Align stack so vector save area is on a quadword boundary.
24093 The padding goes above the vectors. */
24094 if (info->altivec_size != 0)
24095 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24097 info->altivec_save_offset = info->vrsave_save_offset
24098 - info->altivec_padding_size
24099 - info->altivec_size;
24100 gcc_assert (info->altivec_size == 0
24101 || info->altivec_save_offset % 16 == 0);
24103 /* Adjust for AltiVec case. */
24104 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24106 else
24107 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24109 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24110 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24111 info->lr_save_offset = 2*reg_size;
24112 break;
24114 case ABI_V4:
24115 info->fp_save_offset = -info->fp_size;
24116 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24117 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24119 if (TARGET_ALTIVEC_ABI)
24121 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24123 /* Align stack so vector save area is on a quadword boundary. */
24124 if (info->altivec_size != 0)
24125 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24127 info->altivec_save_offset = info->vrsave_save_offset
24128 - info->altivec_padding_size
24129 - info->altivec_size;
24131 /* Adjust for AltiVec case. */
24132 info->ehrd_offset = info->altivec_save_offset;
24134 else
24135 info->ehrd_offset = info->cr_save_offset;
24137 info->ehrd_offset -= ehrd_size;
24138 info->lr_save_offset = reg_size;
24141 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24142 info->save_size = RS6000_ALIGN (info->fp_size
24143 + info->gp_size
24144 + info->altivec_size
24145 + info->altivec_padding_size
24146 + ehrd_size
24147 + ehcr_size
24148 + info->cr_size
24149 + info->vrsave_size,
24150 save_align);
24152 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24154 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24155 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24157 /* Determine if we need to save the link register. */
24158 if (info->calls_p
24159 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24160 && crtl->profile
24161 && !TARGET_PROFILE_KERNEL)
24162 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24163 #ifdef TARGET_RELOCATABLE
24164 || (DEFAULT_ABI == ABI_V4
24165 && (TARGET_RELOCATABLE || flag_pic > 1)
24166 && !constant_pool_empty_p ())
24167 #endif
24168 || rs6000_ra_ever_killed ())
24169 info->lr_save_p = 1;
24171 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24172 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24173 && call_used_regs[STATIC_CHAIN_REGNUM]);
24174 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24176 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24177 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24178 || !(info->savres_strategy & SAVE_INLINE_VRS)
24179 || !(info->savres_strategy & REST_INLINE_GPRS)
24180 || !(info->savres_strategy & REST_INLINE_FPRS)
24181 || !(info->savres_strategy & REST_INLINE_VRS))
24182 info->lr_save_p = 1;
24184 if (info->lr_save_p)
24185 df_set_regs_ever_live (LR_REGNO, true);
24187 /* Determine if we need to allocate any stack frame:
24189 For AIX we need to push the stack if a frame pointer is needed
24190 (because the stack might be dynamically adjusted), if we are
24191 debugging, if we make calls, or if the sum of fp_save, gp_save,
24192 and local variables are more than the space needed to save all
24193 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24194 + 18*8 = 288 (GPR13 reserved).
24196 For V.4 we don't have the stack cushion that AIX uses, but assume
24197 that the debugger can handle stackless frames. */
24199 if (info->calls_p)
24200 info->push_p = 1;
24202 else if (DEFAULT_ABI == ABI_V4)
24203 info->push_p = non_fixed_size != 0;
24205 else if (frame_pointer_needed)
24206 info->push_p = 1;
24208 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24209 info->push_p = 1;
24211 else
24212 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24214 return info;
24217 static void
24218 debug_stack_info (rs6000_stack_t *info)
24220 const char *abi_string;
24222 if (! info)
24223 info = rs6000_stack_info ();
24225 fprintf (stderr, "\nStack information for function %s:\n",
24226 ((current_function_decl && DECL_NAME (current_function_decl))
24227 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24228 : "<unknown>"));
24230 switch (info->abi)
24232 default: abi_string = "Unknown"; break;
24233 case ABI_NONE: abi_string = "NONE"; break;
24234 case ABI_AIX: abi_string = "AIX"; break;
24235 case ABI_ELFv2: abi_string = "ELFv2"; break;
24236 case ABI_DARWIN: abi_string = "Darwin"; break;
24237 case ABI_V4: abi_string = "V.4"; break;
24240 fprintf (stderr, "\tABI = %5s\n", abi_string);
24242 if (TARGET_ALTIVEC_ABI)
24243 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24245 if (info->first_gp_reg_save != 32)
24246 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24248 if (info->first_fp_reg_save != 64)
24249 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24251 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24252 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24253 info->first_altivec_reg_save);
24255 if (info->lr_save_p)
24256 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24258 if (info->cr_save_p)
24259 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24261 if (info->vrsave_mask)
24262 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
24264 if (info->push_p)
24265 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
24267 if (info->calls_p)
24268 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
24270 if (info->gp_size)
24271 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
24273 if (info->fp_size)
24274 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
24276 if (info->altivec_size)
24277 fprintf (stderr, "\taltivec_save_offset = %5d\n",
24278 info->altivec_save_offset);
24280 if (info->vrsave_size)
24281 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
24282 info->vrsave_save_offset);
24284 if (info->lr_save_p)
24285 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
24287 if (info->cr_save_p)
24288 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
24290 if (info->varargs_save_offset)
24291 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
24293 if (info->total_size)
24294 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24295 info->total_size);
24297 if (info->vars_size)
24298 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24299 info->vars_size);
24301 if (info->parm_size)
24302 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24304 if (info->fixed_size)
24305 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24307 if (info->gp_size)
24308 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24310 if (info->fp_size)
24311 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24313 if (info->altivec_size)
24314 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24316 if (info->vrsave_size)
24317 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24319 if (info->altivec_padding_size)
24320 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24321 info->altivec_padding_size);
24323 if (info->cr_size)
24324 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24326 if (info->save_size)
24327 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24329 if (info->reg_size != 4)
24330 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24332 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
24334 fprintf (stderr, "\n");
24338 rs6000_return_addr (int count, rtx frame)
24340 /* We can't use get_hard_reg_initial_val for LR when count == 0 if LR
24341 is trashed by the prologue, as it is for PIC on ABI_V4 and Darwin. */
24342 if (count != 0
24343 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
24345 cfun->machine->ra_needs_full_frame = 1;
24347 if (count == 0)
24348 /* FRAME is set to frame_pointer_rtx by the generic code, but that
24349 is good for loading 0(r1) only when !FRAME_GROWS_DOWNWARD. */
24350 frame = stack_pointer_rtx;
24351 rtx prev_frame_addr = memory_address (Pmode, frame);
24352 rtx prev_frame = copy_to_reg (gen_rtx_MEM (Pmode, prev_frame_addr));
24353 rtx lr_save_off = plus_constant (Pmode,
24354 prev_frame, RETURN_ADDRESS_OFFSET);
24355 rtx lr_save_addr = memory_address (Pmode, lr_save_off);
24356 return gen_rtx_MEM (Pmode, lr_save_addr);
24359 cfun->machine->ra_need_lr = 1;
24360 return get_hard_reg_initial_val (Pmode, LR_REGNO);
24363 /* Say whether a function is a candidate for sibcall handling or not. */
24365 static bool
24366 rs6000_function_ok_for_sibcall (tree decl, tree exp)
24368 tree fntype;
24370 if (decl)
24371 fntype = TREE_TYPE (decl);
24372 else
24373 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
24375 /* We can't do it if the called function has more vector parameters
24376 than the current function; there's nowhere to put the VRsave code. */
24377 if (TARGET_ALTIVEC_ABI
24378 && TARGET_ALTIVEC_VRSAVE
24379 && !(decl && decl == current_function_decl))
24381 function_args_iterator args_iter;
24382 tree type;
24383 int nvreg = 0;
24385 /* Functions with vector parameters are required to have a
24386 prototype, so the argument type info must be available
24387 here. */
24388 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
24389 if (TREE_CODE (type) == VECTOR_TYPE
24390 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24391 nvreg++;
24393 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
24394 if (TREE_CODE (type) == VECTOR_TYPE
24395 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24396 nvreg--;
24398 if (nvreg > 0)
24399 return false;
24402 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
24403 functions, because the callee may have a different TOC pointer to
24404 the caller and there's no way to ensure we restore the TOC when
24405 we return. With the secure-plt SYSV ABI we can't make non-local
24406 calls when -fpic/PIC because the plt call stubs use r30. */
24407 if (DEFAULT_ABI == ABI_DARWIN
24408 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24409 && decl
24410 && !DECL_EXTERNAL (decl)
24411 && !DECL_WEAK (decl)
24412 && (*targetm.binds_local_p) (decl))
24413 || (DEFAULT_ABI == ABI_V4
24414 && (!TARGET_SECURE_PLT
24415 || !flag_pic
24416 || (decl
24417 && (*targetm.binds_local_p) (decl)))))
24419 tree attr_list = TYPE_ATTRIBUTES (fntype);
24421 if (!lookup_attribute ("longcall", attr_list)
24422 || lookup_attribute ("shortcall", attr_list))
24423 return true;
24426 return false;
24429 static int
24430 rs6000_ra_ever_killed (void)
24432 rtx_insn *top;
24433 rtx reg;
24434 rtx_insn *insn;
24436 if (cfun->is_thunk)
24437 return 0;
24439 if (cfun->machine->lr_save_state)
24440 return cfun->machine->lr_save_state - 1;
24442 /* regs_ever_live has LR marked as used if any sibcalls are present,
24443 but this should not force saving and restoring in the
24444 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
24445 clobbers LR, so that is inappropriate. */
24447 /* Also, the prologue can generate a store into LR that
24448 doesn't really count, like this:
24450 move LR->R0
24451 bcl to set PIC register
24452 move LR->R31
24453 move R0->LR
24455 When we're called from the epilogue, we need to avoid counting
24456 this as a store. */
24458 push_topmost_sequence ();
24459 top = get_insns ();
24460 pop_topmost_sequence ();
24461 reg = gen_rtx_REG (Pmode, LR_REGNO);
24463 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
24465 if (INSN_P (insn))
24467 if (CALL_P (insn))
24469 if (!SIBLING_CALL_P (insn))
24470 return 1;
24472 else if (find_regno_note (insn, REG_INC, LR_REGNO))
24473 return 1;
24474 else if (set_of (reg, insn) != NULL_RTX
24475 && !prologue_epilogue_contains (insn))
24476 return 1;
24479 return 0;
24482 /* Emit instructions needed to load the TOC register.
24483 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
24484 a constant pool; or for SVR4 -fpic. */
24486 void
24487 rs6000_emit_load_toc_table (int fromprolog)
24489 rtx dest;
24490 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
24492 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
24494 char buf[30];
24495 rtx lab, tmp1, tmp2, got;
24497 lab = gen_label_rtx ();
24498 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
24499 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24500 if (flag_pic == 2)
24502 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24503 need_toc_init = 1;
24505 else
24506 got = rs6000_got_sym ();
24507 tmp1 = tmp2 = dest;
24508 if (!fromprolog)
24510 tmp1 = gen_reg_rtx (Pmode);
24511 tmp2 = gen_reg_rtx (Pmode);
24513 emit_insn (gen_load_toc_v4_PIC_1 (lab));
24514 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
24515 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
24516 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
24518 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
24520 emit_insn (gen_load_toc_v4_pic_si ());
24521 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24523 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
24525 char buf[30];
24526 rtx temp0 = (fromprolog
24527 ? gen_rtx_REG (Pmode, 0)
24528 : gen_reg_rtx (Pmode));
24530 if (fromprolog)
24532 rtx symF, symL;
24534 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
24535 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24537 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
24538 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24540 emit_insn (gen_load_toc_v4_PIC_1 (symF));
24541 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24542 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
24544 else
24546 rtx tocsym, lab;
24548 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24549 need_toc_init = 1;
24550 lab = gen_label_rtx ();
24551 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
24552 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24553 if (TARGET_LINK_STACK)
24554 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
24555 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
24557 emit_insn (gen_addsi3 (dest, temp0, dest));
24559 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
24561 /* This is for AIX code running in non-PIC ELF32. */
24562 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24564 need_toc_init = 1;
24565 emit_insn (gen_elf_high (dest, realsym));
24566 emit_insn (gen_elf_low (dest, dest, realsym));
24568 else
24570 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24572 if (TARGET_32BIT)
24573 emit_insn (gen_load_toc_aix_si (dest));
24574 else
24575 emit_insn (gen_load_toc_aix_di (dest));
24579 /* Emit instructions to restore the link register after determining where
24580 its value has been stored. */
24582 void
24583 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
24585 rs6000_stack_t *info = rs6000_stack_info ();
24586 rtx operands[2];
24588 operands[0] = source;
24589 operands[1] = scratch;
24591 if (info->lr_save_p)
24593 rtx frame_rtx = stack_pointer_rtx;
24594 HOST_WIDE_INT sp_offset = 0;
24595 rtx tmp;
24597 if (frame_pointer_needed
24598 || cfun->calls_alloca
24599 || info->total_size > 32767)
24601 tmp = gen_frame_mem (Pmode, frame_rtx);
24602 emit_move_insn (operands[1], tmp);
24603 frame_rtx = operands[1];
24605 else if (info->push_p)
24606 sp_offset = info->total_size;
24608 tmp = plus_constant (Pmode, frame_rtx,
24609 info->lr_save_offset + sp_offset);
24610 tmp = gen_frame_mem (Pmode, tmp);
24611 emit_move_insn (tmp, operands[0]);
24613 else
24614 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
24616 /* Freeze lr_save_p. We've just emitted rtl that depends on the
24617 state of lr_save_p so any change from here on would be a bug. In
24618 particular, stop rs6000_ra_ever_killed from considering the SET
24619 of lr we may have added just above. */
24620 cfun->machine->lr_save_state = info->lr_save_p + 1;
24623 static GTY(()) alias_set_type set = -1;
24625 alias_set_type
24626 get_TOC_alias_set (void)
24628 if (set == -1)
24629 set = new_alias_set ();
24630 return set;
24633 /* This returns nonzero if the current function uses the TOC. This is
24634 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
24635 is generated by the ABI_V4 load_toc_* patterns.
24636 Return 2 instead of 1 if the load_toc_* pattern is in the function
24637 partition that doesn't start the function. */
24638 #if TARGET_ELF
24639 static int
24640 uses_TOC (void)
24642 rtx_insn *insn;
24643 int ret = 1;
24645 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24647 if (INSN_P (insn))
24649 rtx pat = PATTERN (insn);
24650 int i;
24652 if (GET_CODE (pat) == PARALLEL)
24653 for (i = 0; i < XVECLEN (pat, 0); i++)
24655 rtx sub = XVECEXP (pat, 0, i);
24656 if (GET_CODE (sub) == USE)
24658 sub = XEXP (sub, 0);
24659 if (GET_CODE (sub) == UNSPEC
24660 && XINT (sub, 1) == UNSPEC_TOC)
24661 return ret;
24665 else if (crtl->has_bb_partition
24666 && NOTE_P (insn)
24667 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
24668 ret = 2;
24670 return 0;
24672 #endif
24675 create_TOC_reference (rtx symbol, rtx largetoc_reg)
24677 rtx tocrel, tocreg, hi;
24679 if (TARGET_DEBUG_ADDR)
24681 if (GET_CODE (symbol) == SYMBOL_REF)
24682 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
24683 XSTR (symbol, 0));
24684 else
24686 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
24687 GET_RTX_NAME (GET_CODE (symbol)));
24688 debug_rtx (symbol);
24692 if (!can_create_pseudo_p ())
24693 df_set_regs_ever_live (TOC_REGISTER, true);
24695 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
24696 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
24697 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
24698 return tocrel;
24700 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
24701 if (largetoc_reg != NULL)
24703 emit_move_insn (largetoc_reg, hi);
24704 hi = largetoc_reg;
24706 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
24709 /* Issue assembly directives that create a reference to the given DWARF
24710 FRAME_TABLE_LABEL from the current function section. */
24711 void
24712 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
24714 fprintf (asm_out_file, "\t.ref %s\n",
24715 (* targetm.strip_name_encoding) (frame_table_label));
24718 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
24719 and the change to the stack pointer. */
24721 static void
24722 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
24724 rtvec p;
24725 int i;
24726 rtx regs[3];
24728 i = 0;
24729 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24730 if (hard_frame_needed)
24731 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
24732 if (!(REGNO (fp) == STACK_POINTER_REGNUM
24733 || (hard_frame_needed
24734 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
24735 regs[i++] = fp;
24737 p = rtvec_alloc (i);
24738 while (--i >= 0)
24740 rtx mem = gen_frame_mem (BLKmode, regs[i]);
24741 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
24744 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
24747 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
24748 and set the appropriate attributes for the generated insn. Return the
24749 first insn which adjusts the stack pointer or the last insn before
24750 the stack adjustment loop.
24752 SIZE_INT is used to create the CFI note for the allocation.
24754 SIZE_RTX is an rtx containing the size of the adjustment. Note that
24755 since stacks grow to lower addresses its runtime value is -SIZE_INT.
24757 ORIG_SP contains the backchain value that must be stored at *sp. */
24759 static rtx_insn *
24760 rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
24762 rtx_insn *insn;
24764 rtx size_rtx = GEN_INT (-size_int);
24765 if (size_int > 32767)
24767 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24768 /* Need a note here so that try_split doesn't get confused. */
24769 if (get_last_insn () == NULL_RTX)
24770 emit_note (NOTE_INSN_DELETED);
24771 insn = emit_move_insn (tmp_reg, size_rtx);
24772 try_split (PATTERN (insn), insn, 0);
24773 size_rtx = tmp_reg;
24776 if (Pmode == SImode)
24777 insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
24778 stack_pointer_rtx,
24779 size_rtx,
24780 orig_sp));
24781 else
24782 insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
24783 stack_pointer_rtx,
24784 size_rtx,
24785 orig_sp));
24786 rtx par = PATTERN (insn);
24787 gcc_assert (GET_CODE (par) == PARALLEL);
24788 rtx set = XVECEXP (par, 0, 0);
24789 gcc_assert (GET_CODE (set) == SET);
24790 rtx mem = SET_DEST (set);
24791 gcc_assert (MEM_P (mem));
24792 MEM_NOTRAP_P (mem) = 1;
24793 set_mem_alias_set (mem, get_frame_alias_set ());
24795 RTX_FRAME_RELATED_P (insn) = 1;
24796 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24797 gen_rtx_SET (stack_pointer_rtx,
24798 gen_rtx_PLUS (Pmode,
24799 stack_pointer_rtx,
24800 GEN_INT (-size_int))));
24802 /* Emit a blockage to ensure the allocation/probing insns are
24803 not optimized, combined, removed, etc. Add REG_STACK_CHECK
24804 note for similar reasons. */
24805 if (flag_stack_clash_protection)
24807 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
24808 emit_insn (gen_blockage ());
24811 return insn;
24814 static HOST_WIDE_INT
24815 get_stack_clash_protection_probe_interval (void)
24817 return (HOST_WIDE_INT_1U
24818 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
24821 static HOST_WIDE_INT
24822 get_stack_clash_protection_guard_size (void)
24824 return (HOST_WIDE_INT_1U
24825 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
24828 /* Allocate ORIG_SIZE bytes on the stack and probe the newly
24829 allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
24831 COPY_REG, if non-null, should contain a copy of the original
24832 stack pointer at exit from this function.
24834 This is subtly different than the Ada probing in that it tries hard to
24835 prevent attacks that jump the stack guard. Thus it is never allowed to
24836 allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
24837 space without a suitable probe. */
24838 static rtx_insn *
24839 rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
24840 rtx copy_reg)
24842 rtx orig_sp = copy_reg;
24844 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
24846 /* Round the size down to a multiple of PROBE_INTERVAL. */
24847 HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
24849 /* If explicitly requested,
24850 or the rounded size is not the same as the original size
24851 or the the rounded size is greater than a page,
24852 then we will need a copy of the original stack pointer. */
24853 if (rounded_size != orig_size
24854 || rounded_size > probe_interval
24855 || copy_reg)
24857 /* If the caller did not request a copy of the incoming stack
24858 pointer, then we use r0 to hold the copy. */
24859 if (!copy_reg)
24860 orig_sp = gen_rtx_REG (Pmode, 0);
24861 emit_move_insn (orig_sp, stack_pointer_rtx);
24864 /* There's three cases here.
24866 One is a single probe which is the most common and most efficiently
24867 implemented as it does not have to have a copy of the original
24868 stack pointer if there are no residuals.
24870 Second is unrolled allocation/probes which we use if there's just
24871 a few of them. It needs to save the original stack pointer into a
24872 temporary for use as a source register in the allocation/probe.
24874 Last is a loop. This is the most uncommon case and least efficient. */
24875 rtx_insn *retval = NULL;
24876 if (rounded_size == probe_interval)
24878 retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
24880 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
24882 else if (rounded_size <= 8 * probe_interval)
24884 /* The ABI requires using the store with update insns to allocate
24885 space and store the backchain into the stack
24887 So we save the current stack pointer into a temporary, then
24888 emit the store-with-update insns to store the saved stack pointer
24889 into the right location in each new page. */
24890 for (int i = 0; i < rounded_size; i += probe_interval)
24892 rtx_insn *insn
24893 = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
24895 /* Save the first stack adjustment in RETVAL. */
24896 if (i == 0)
24897 retval = insn;
24900 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
24902 else
24904 /* Compute the ending address. */
24905 rtx end_addr
24906 = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
24907 rtx rs = GEN_INT (-rounded_size);
24908 rtx_insn *insn;
24909 if (add_operand (rs, Pmode))
24910 insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
24911 else
24913 emit_move_insn (end_addr, GEN_INT (-rounded_size));
24914 insn = emit_insn (gen_add3_insn (end_addr, end_addr,
24915 stack_pointer_rtx));
24916 /* Describe the effect of INSN to the CFI engine. */
24917 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24918 gen_rtx_SET (end_addr,
24919 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24920 rs)));
24922 RTX_FRAME_RELATED_P (insn) = 1;
24924 /* Emit the loop. */
24925 if (TARGET_64BIT)
24926 retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
24927 stack_pointer_rtx, orig_sp,
24928 end_addr));
24929 else
24930 retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
24931 stack_pointer_rtx, orig_sp,
24932 end_addr));
24933 RTX_FRAME_RELATED_P (retval) = 1;
24934 /* Describe the effect of INSN to the CFI engine. */
24935 add_reg_note (retval, REG_FRAME_RELATED_EXPR,
24936 gen_rtx_SET (stack_pointer_rtx, end_addr));
24938 /* Emit a blockage to ensure the allocation/probing insns are
24939 not optimized, combined, removed, etc. Other cases handle this
24940 within their call to rs6000_emit_allocate_stack_1. */
24941 emit_insn (gen_blockage ());
24943 dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
24946 if (orig_size != rounded_size)
24948 /* Allocate (and implicitly probe) any residual space. */
24949 HOST_WIDE_INT residual = orig_size - rounded_size;
24951 rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
24953 /* If the residual was the only allocation, then we can return the
24954 allocating insn. */
24955 if (!retval)
24956 retval = insn;
24959 return retval;
24962 /* Emit the correct code for allocating stack space, as insns.
24963 If COPY_REG, make sure a copy of the old frame is left there.
24964 The generated code may use hard register 0 as a temporary. */
24966 static rtx_insn *
24967 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
24969 rtx_insn *insn;
24970 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24971 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24972 rtx todec = gen_int_mode (-size, Pmode);
24974 if (INTVAL (todec) != -size)
24976 warning (0, "stack frame too large");
24977 emit_insn (gen_trap ());
24978 return 0;
24981 if (crtl->limit_stack)
24983 if (REG_P (stack_limit_rtx)
24984 && REGNO (stack_limit_rtx) > 1
24985 && REGNO (stack_limit_rtx) <= 31)
24987 rtx_insn *insn
24988 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
24989 gcc_assert (insn);
24990 emit_insn (insn);
24991 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
24993 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
24994 && TARGET_32BIT
24995 && DEFAULT_ABI == ABI_V4
24996 && !flag_pic)
24998 rtx toload = gen_rtx_CONST (VOIDmode,
24999 gen_rtx_PLUS (Pmode,
25000 stack_limit_rtx,
25001 GEN_INT (size)));
25003 emit_insn (gen_elf_high (tmp_reg, toload));
25004 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25005 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25006 const0_rtx));
25008 else
25009 warning (0, "stack limit expression is not supported");
25012 if (flag_stack_clash_protection)
25014 if (size < get_stack_clash_protection_guard_size ())
25015 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
25016 else
25018 rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
25019 copy_reg);
25021 /* If we asked for a copy with an offset, then we still need add in
25022 the offset. */
25023 if (copy_reg && copy_off)
25024 emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
25025 return insn;
25029 if (copy_reg)
25031 if (copy_off != 0)
25032 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25033 else
25034 emit_move_insn (copy_reg, stack_reg);
25037 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25038 it now and set the alias set/attributes. The above gen_*_update
25039 calls will generate a PARALLEL with the MEM set being the first
25040 operation. */
25041 insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
25042 return insn;
25045 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25047 #if PROBE_INTERVAL > 32768
25048 #error Cannot use indexed addressing mode for stack probing
25049 #endif
25051 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25052 inclusive. These are offsets from the current stack pointer. */
25054 static void
25055 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25057 /* See if we have a constant small number of probes to generate. If so,
25058 that's the easy case. */
25059 if (first + size <= 32768)
25061 HOST_WIDE_INT i;
25063 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25064 it exceeds SIZE. If only one probe is needed, this will not
25065 generate any code. Then probe at FIRST + SIZE. */
25066 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25067 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25068 -(first + i)));
25070 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25071 -(first + size)));
25074 /* Otherwise, do the same as above, but in a loop. Note that we must be
25075 extra careful with variables wrapping around because we might be at
25076 the very top (or the very bottom) of the address space and we have
25077 to be able to handle this case properly; in particular, we use an
25078 equality test for the loop condition. */
25079 else
25081 HOST_WIDE_INT rounded_size;
25082 rtx r12 = gen_rtx_REG (Pmode, 12);
25083 rtx r0 = gen_rtx_REG (Pmode, 0);
25085 /* Sanity check for the addressing mode we're going to use. */
25086 gcc_assert (first <= 32768);
25088 /* Step 1: round SIZE to the previous multiple of the interval. */
25090 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25093 /* Step 2: compute initial and final value of the loop counter. */
25095 /* TEST_ADDR = SP + FIRST. */
25096 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25097 -first)));
25099 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25100 if (rounded_size > 32768)
25102 emit_move_insn (r0, GEN_INT (-rounded_size));
25103 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25105 else
25106 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25107 -rounded_size)));
25110 /* Step 3: the loop
25114 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25115 probe at TEST_ADDR
25117 while (TEST_ADDR != LAST_ADDR)
25119 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25120 until it is equal to ROUNDED_SIZE. */
25122 if (TARGET_64BIT)
25123 emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
25124 else
25125 emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
25128 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25129 that SIZE is equal to ROUNDED_SIZE. */
25131 if (size != rounded_size)
25132 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25136 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25137 addresses, not offsets. */
25139 static const char *
25140 output_probe_stack_range_1 (rtx reg1, rtx reg2)
25142 static int labelno = 0;
25143 char loop_lab[32];
25144 rtx xops[2];
25146 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25148 /* Loop. */
25149 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25151 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25152 xops[0] = reg1;
25153 xops[1] = GEN_INT (-PROBE_INTERVAL);
25154 output_asm_insn ("addi %0,%0,%1", xops);
25156 /* Probe at TEST_ADDR. */
25157 xops[1] = gen_rtx_REG (Pmode, 0);
25158 output_asm_insn ("stw %1,0(%0)", xops);
25160 /* Test if TEST_ADDR == LAST_ADDR. */
25161 xops[1] = reg2;
25162 if (TARGET_64BIT)
25163 output_asm_insn ("cmpd 0,%0,%1", xops);
25164 else
25165 output_asm_insn ("cmpw 0,%0,%1", xops);
25167 /* Branch. */
25168 fputs ("\tbne 0,", asm_out_file);
25169 assemble_name_raw (asm_out_file, loop_lab);
25170 fputc ('\n', asm_out_file);
25172 return "";
25175 /* This function is called when rs6000_frame_related is processing
25176 SETs within a PARALLEL, and returns whether the REGNO save ought to
25177 be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
25178 for out-of-line register save functions, store multiple, and the
25179 Darwin world_save. They may contain registers that don't really
25180 need saving. */
25182 static bool
25183 interesting_frame_related_regno (unsigned int regno)
25185 /* Saves apparently of r0 are actually saving LR. It doesn't make
25186 sense to substitute the regno here to test save_reg_p (LR_REGNO).
25187 We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
25188 (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
25189 as frame related. */
25190 if (regno == 0)
25191 return true;
25192 /* If we see CR2 then we are here on a Darwin world save. Saves of
25193 CR2 signify the whole CR is being saved. This is a long-standing
25194 ABI wart fixed by ELFv2. As for r0/lr there is no need to check
25195 that CR needs to be saved. */
25196 if (regno == CR2_REGNO)
25197 return true;
25198 /* Omit frame info for any user-defined global regs. If frame info
25199 is supplied for them, frame unwinding will restore a user reg.
25200 Also omit frame info for any reg we don't need to save, as that
25201 bloats frame info and can cause problems with shrink wrapping.
25202 Since global regs won't be seen as needing to be saved, both of
25203 these conditions are covered by save_reg_p. */
25204 return save_reg_p (regno);
25207 /* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
25208 addresses, not offsets.
25210 REG2 contains the backchain that must be stored into *sp at each allocation.
25212 This is subtly different than the Ada probing above in that it tries hard
25213 to prevent attacks that jump the stack guard. Thus, it is never allowed
25214 to allocate more than PROBE_INTERVAL bytes of stack space without a
25215 suitable probe. */
25217 static const char *
25218 output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
25220 static int labelno = 0;
25221 char loop_lab[32];
25222 rtx xops[3];
25224 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25226 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25228 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25230 /* This allocates and probes. */
25231 xops[0] = reg1;
25232 xops[1] = reg2;
25233 xops[2] = GEN_INT (-probe_interval);
25234 if (TARGET_64BIT)
25235 output_asm_insn ("stdu %1,%2(%0)", xops);
25236 else
25237 output_asm_insn ("stwu %1,%2(%0)", xops);
25239 /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
25240 xops[0] = reg1;
25241 xops[1] = reg3;
25242 if (TARGET_64BIT)
25243 output_asm_insn ("cmpd 0,%0,%1", xops);
25244 else
25245 output_asm_insn ("cmpw 0,%0,%1", xops);
25247 fputs ("\tbne 0,", asm_out_file);
25248 assemble_name_raw (asm_out_file, loop_lab);
25249 fputc ('\n', asm_out_file);
25251 return "";
25254 /* Wrapper around the output_probe_stack_range routines. */
25255 const char *
25256 output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
25258 if (flag_stack_clash_protection)
25259 return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
25260 else
25261 return output_probe_stack_range_1 (reg1, reg3);
25264 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25265 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25266 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25267 deduce these equivalences by itself so it wasn't necessary to hold
25268 its hand so much. Don't be tempted to always supply d2_f_d_e with
25269 the actual cfa register, ie. r31 when we are using a hard frame
25270 pointer. That fails when saving regs off r1, and sched moves the
25271 r31 setup past the reg saves. */
25273 static rtx_insn *
25274 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
25275 rtx reg2, rtx repl2)
25277 rtx repl;
25279 if (REGNO (reg) == STACK_POINTER_REGNUM)
25281 gcc_checking_assert (val == 0);
25282 repl = NULL_RTX;
25284 else
25285 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25286 GEN_INT (val));
25288 rtx pat = PATTERN (insn);
25289 if (!repl && !reg2)
25291 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25292 if (GET_CODE (pat) == PARALLEL)
25293 for (int i = 0; i < XVECLEN (pat, 0); i++)
25294 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25296 rtx set = XVECEXP (pat, 0, i);
25298 if (!REG_P (SET_SRC (set))
25299 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25300 RTX_FRAME_RELATED_P (set) = 1;
25302 RTX_FRAME_RELATED_P (insn) = 1;
25303 return insn;
25306 /* We expect that 'pat' is either a SET or a PARALLEL containing
25307 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25308 are important so they all have to be marked RTX_FRAME_RELATED_P.
25309 Call simplify_replace_rtx on the SETs rather than the whole insn
25310 so as to leave the other stuff alone (for example USE of r12). */
25312 set_used_flags (pat);
25313 if (GET_CODE (pat) == SET)
25315 if (repl)
25316 pat = simplify_replace_rtx (pat, reg, repl);
25317 if (reg2)
25318 pat = simplify_replace_rtx (pat, reg2, repl2);
25320 else if (GET_CODE (pat) == PARALLEL)
25322 pat = shallow_copy_rtx (pat);
25323 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25325 for (int i = 0; i < XVECLEN (pat, 0); i++)
25326 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25328 rtx set = XVECEXP (pat, 0, i);
25330 if (repl)
25331 set = simplify_replace_rtx (set, reg, repl);
25332 if (reg2)
25333 set = simplify_replace_rtx (set, reg2, repl2);
25334 XVECEXP (pat, 0, i) = set;
25336 if (!REG_P (SET_SRC (set))
25337 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25338 RTX_FRAME_RELATED_P (set) = 1;
25341 else
25342 gcc_unreachable ();
25344 RTX_FRAME_RELATED_P (insn) = 1;
25345 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
25347 return insn;
25350 /* Returns an insn that has a vrsave set operation with the
25351 appropriate CLOBBERs. */
25353 static rtx
25354 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25356 int nclobs, i;
25357 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25358 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25360 clobs[0]
25361 = gen_rtx_SET (vrsave,
25362 gen_rtx_UNSPEC_VOLATILE (SImode,
25363 gen_rtvec (2, reg, vrsave),
25364 UNSPECV_SET_VRSAVE));
25366 nclobs = 1;
25368 /* We need to clobber the registers in the mask so the scheduler
25369 does not move sets to VRSAVE before sets of AltiVec registers.
25371 However, if the function receives nonlocal gotos, reload will set
25372 all call saved registers live. We will end up with:
25374 (set (reg 999) (mem))
25375 (parallel [ (set (reg vrsave) (unspec blah))
25376 (clobber (reg 999))])
25378 The clobber will cause the store into reg 999 to be dead, and
25379 flow will attempt to delete an epilogue insn. In this case, we
25380 need an unspec use/set of the register. */
25382 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25383 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25385 if (!epiloguep || call_used_regs [i])
25386 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
25387 gen_rtx_REG (V4SImode, i));
25388 else
25390 rtx reg = gen_rtx_REG (V4SImode, i);
25392 clobs[nclobs++]
25393 = gen_rtx_SET (reg,
25394 gen_rtx_UNSPEC (V4SImode,
25395 gen_rtvec (1, reg), 27));
25399 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
25401 for (i = 0; i < nclobs; ++i)
25402 XVECEXP (insn, 0, i) = clobs[i];
25404 return insn;
25407 static rtx
25408 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
25410 rtx addr, mem;
25412 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
25413 mem = gen_frame_mem (GET_MODE (reg), addr);
25414 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
25417 static rtx
25418 gen_frame_load (rtx reg, rtx frame_reg, int offset)
25420 return gen_frame_set (reg, frame_reg, offset, false);
25423 static rtx
25424 gen_frame_store (rtx reg, rtx frame_reg, int offset)
25426 return gen_frame_set (reg, frame_reg, offset, true);
25429 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
25430 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
25432 static rtx_insn *
25433 emit_frame_save (rtx frame_reg, machine_mode mode,
25434 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
25436 rtx reg;
25438 /* Some cases that need register indexed addressing. */
25439 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
25440 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
25442 reg = gen_rtx_REG (mode, regno);
25443 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
25444 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
25445 NULL_RTX, NULL_RTX);
25448 /* Emit an offset memory reference suitable for a frame store, while
25449 converting to a valid addressing mode. */
25451 static rtx
25452 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
25454 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
25457 #ifndef TARGET_FIX_AND_CONTINUE
25458 #define TARGET_FIX_AND_CONTINUE 0
25459 #endif
25461 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
25462 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
25463 #define LAST_SAVRES_REGISTER 31
25464 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
25466 enum {
25467 SAVRES_LR = 0x1,
25468 SAVRES_SAVE = 0x2,
25469 SAVRES_REG = 0x0c,
25470 SAVRES_GPR = 0,
25471 SAVRES_FPR = 4,
25472 SAVRES_VR = 8
25475 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
25477 /* Temporary holding space for an out-of-line register save/restore
25478 routine name. */
25479 static char savres_routine_name[30];
25481 /* Return the name for an out-of-line register save/restore routine.
25482 We are saving/restoring GPRs if GPR is true. */
25484 static char *
25485 rs6000_savres_routine_name (int regno, int sel)
25487 const char *prefix = "";
25488 const char *suffix = "";
25490 /* Different targets are supposed to define
25491 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
25492 routine name could be defined with:
25494 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
25496 This is a nice idea in practice, but in reality, things are
25497 complicated in several ways:
25499 - ELF targets have save/restore routines for GPRs.
25501 - PPC64 ELF targets have routines for save/restore of GPRs that
25502 differ in what they do with the link register, so having a set
25503 prefix doesn't work. (We only use one of the save routines at
25504 the moment, though.)
25506 - PPC32 elf targets have "exit" versions of the restore routines
25507 that restore the link register and can save some extra space.
25508 These require an extra suffix. (There are also "tail" versions
25509 of the restore routines and "GOT" versions of the save routines,
25510 but we don't generate those at present. Same problems apply,
25511 though.)
25513 We deal with all this by synthesizing our own prefix/suffix and
25514 using that for the simple sprintf call shown above. */
25515 if (DEFAULT_ABI == ABI_V4)
25517 if (TARGET_64BIT)
25518 goto aix_names;
25520 if ((sel & SAVRES_REG) == SAVRES_GPR)
25521 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25522 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25523 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25524 else if ((sel & SAVRES_REG) == SAVRES_VR)
25525 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25526 else
25527 abort ();
25529 if ((sel & SAVRES_LR))
25530 suffix = "_x";
25532 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25534 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25535 /* No out-of-line save/restore routines for GPRs on AIX. */
25536 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25537 #endif
25539 aix_names:
25540 if ((sel & SAVRES_REG) == SAVRES_GPR)
25541 prefix = ((sel & SAVRES_SAVE)
25542 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25543 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25544 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25546 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25547 if ((sel & SAVRES_LR))
25548 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25549 else
25550 #endif
25552 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25553 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25556 else if ((sel & SAVRES_REG) == SAVRES_VR)
25557 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25558 else
25559 abort ();
25562 if (DEFAULT_ABI == ABI_DARWIN)
25564 /* The Darwin approach is (slightly) different, in order to be
25565 compatible with code generated by the system toolchain. There is a
25566 single symbol for the start of save sequence, and the code here
25567 embeds an offset into that code on the basis of the first register
25568 to be saved. */
25569 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
25570 if ((sel & SAVRES_REG) == SAVRES_GPR)
25571 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
25572 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
25573 (regno - 13) * 4, prefix, regno);
25574 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25575 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
25576 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
25577 else if ((sel & SAVRES_REG) == SAVRES_VR)
25578 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
25579 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
25580 else
25581 abort ();
25583 else
25584 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
25586 return savres_routine_name;
25589 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
25590 We are saving/restoring GPRs if GPR is true. */
25592 static rtx
25593 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
25595 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
25596 ? info->first_gp_reg_save
25597 : (sel & SAVRES_REG) == SAVRES_FPR
25598 ? info->first_fp_reg_save - 32
25599 : (sel & SAVRES_REG) == SAVRES_VR
25600 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
25601 : -1);
25602 rtx sym;
25603 int select = sel;
25605 /* Don't generate bogus routine names. */
25606 gcc_assert (FIRST_SAVRES_REGISTER <= regno
25607 && regno <= LAST_SAVRES_REGISTER
25608 && select >= 0 && select <= 12);
25610 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
25612 if (sym == NULL)
25614 char *name;
25616 name = rs6000_savres_routine_name (regno, sel);
25618 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
25619 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
25620 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
25623 return sym;
25626 /* Emit a sequence of insns, including a stack tie if needed, for
25627 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
25628 reset the stack pointer, but move the base of the frame into
25629 reg UPDT_REGNO for use by out-of-line register restore routines. */
25631 static rtx
25632 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
25633 unsigned updt_regno)
25635 /* If there is nothing to do, don't do anything. */
25636 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
25637 return NULL_RTX;
25639 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
25641 /* This blockage is needed so that sched doesn't decide to move
25642 the sp change before the register restores. */
25643 if (DEFAULT_ABI == ABI_V4)
25644 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
25645 GEN_INT (frame_off)));
25647 /* If we are restoring registers out-of-line, we will be using the
25648 "exit" variants of the restore routines, which will reset the
25649 stack for us. But we do need to point updt_reg into the
25650 right place for those routines. */
25651 if (frame_off != 0)
25652 return emit_insn (gen_add3_insn (updt_reg_rtx,
25653 frame_reg_rtx, GEN_INT (frame_off)));
25654 else
25655 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
25657 return NULL_RTX;
25660 /* Return the register number used as a pointer by out-of-line
25661 save/restore functions. */
25663 static inline unsigned
25664 ptr_regno_for_savres (int sel)
25666 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25667 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
25668 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
25671 /* Construct a parallel rtx describing the effect of a call to an
25672 out-of-line register save/restore routine, and emit the insn
25673 or jump_insn as appropriate. */
25675 static rtx_insn *
25676 rs6000_emit_savres_rtx (rs6000_stack_t *info,
25677 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
25678 machine_mode reg_mode, int sel)
25680 int i;
25681 int offset, start_reg, end_reg, n_regs, use_reg;
25682 int reg_size = GET_MODE_SIZE (reg_mode);
25683 rtx sym;
25684 rtvec p;
25685 rtx par;
25686 rtx_insn *insn;
25688 offset = 0;
25689 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25690 ? info->first_gp_reg_save
25691 : (sel & SAVRES_REG) == SAVRES_FPR
25692 ? info->first_fp_reg_save
25693 : (sel & SAVRES_REG) == SAVRES_VR
25694 ? info->first_altivec_reg_save
25695 : -1);
25696 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25697 ? 32
25698 : (sel & SAVRES_REG) == SAVRES_FPR
25699 ? 64
25700 : (sel & SAVRES_REG) == SAVRES_VR
25701 ? LAST_ALTIVEC_REGNO + 1
25702 : -1);
25703 n_regs = end_reg - start_reg;
25704 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
25705 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
25706 + n_regs);
25708 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25709 RTVEC_ELT (p, offset++) = ret_rtx;
25711 RTVEC_ELT (p, offset++)
25712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
25714 sym = rs6000_savres_routine_sym (info, sel);
25715 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
25717 use_reg = ptr_regno_for_savres (sel);
25718 if ((sel & SAVRES_REG) == SAVRES_VR)
25720 /* Vector regs are saved/restored using [reg+reg] addressing. */
25721 RTVEC_ELT (p, offset++)
25722 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25723 RTVEC_ELT (p, offset++)
25724 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
25726 else
25727 RTVEC_ELT (p, offset++)
25728 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25730 for (i = 0; i < end_reg - start_reg; i++)
25731 RTVEC_ELT (p, i + offset)
25732 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
25733 frame_reg_rtx, save_area_offset + reg_size * i,
25734 (sel & SAVRES_SAVE) != 0);
25736 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25737 RTVEC_ELT (p, i + offset)
25738 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
25740 par = gen_rtx_PARALLEL (VOIDmode, p);
25742 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25744 insn = emit_jump_insn (par);
25745 JUMP_LABEL (insn) = ret_rtx;
25747 else
25748 insn = emit_insn (par);
25749 return insn;
25752 /* Emit prologue code to store CR fields that need to be saved into REG. This
25753 function should only be called when moving the non-volatile CRs to REG, it
25754 is not a general purpose routine to move the entire set of CRs to REG.
25755 Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
25756 volatile CRs. */
25758 static void
25759 rs6000_emit_prologue_move_from_cr (rtx reg)
25761 /* Only the ELFv2 ABI allows storing only selected fields. */
25762 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
25764 int i, cr_reg[8], count = 0;
25766 /* Collect CR fields that must be saved. */
25767 for (i = 0; i < 8; i++)
25768 if (save_reg_p (CR0_REGNO + i))
25769 cr_reg[count++] = i;
25771 /* If it's just a single one, use mfcrf. */
25772 if (count == 1)
25774 rtvec p = rtvec_alloc (1);
25775 rtvec r = rtvec_alloc (2);
25776 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
25777 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
25778 RTVEC_ELT (p, 0)
25779 = gen_rtx_SET (reg,
25780 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
25782 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25783 return;
25786 /* ??? It might be better to handle count == 2 / 3 cases here
25787 as well, using logical operations to combine the values. */
25790 emit_insn (gen_prologue_movesi_from_cr (reg));
25793 /* Return whether the split-stack arg pointer (r12) is used. */
25795 static bool
25796 split_stack_arg_pointer_used_p (void)
25798 /* If the pseudo holding the arg pointer is no longer a pseudo,
25799 then the arg pointer is used. */
25800 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
25801 && (!REG_P (cfun->machine->split_stack_arg_pointer)
25802 || (REGNO (cfun->machine->split_stack_arg_pointer)
25803 < FIRST_PSEUDO_REGISTER)))
25804 return true;
25806 /* Unfortunately we also need to do some code scanning, since
25807 r12 may have been substituted for the pseudo. */
25808 rtx_insn *insn;
25809 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
25810 FOR_BB_INSNS (bb, insn)
25811 if (NONDEBUG_INSN_P (insn))
25813 /* A call destroys r12. */
25814 if (CALL_P (insn))
25815 return false;
25817 df_ref use;
25818 FOR_EACH_INSN_USE (use, insn)
25820 rtx x = DF_REF_REG (use);
25821 if (REG_P (x) && REGNO (x) == 12)
25822 return true;
25824 df_ref def;
25825 FOR_EACH_INSN_DEF (def, insn)
25827 rtx x = DF_REF_REG (def);
25828 if (REG_P (x) && REGNO (x) == 12)
25829 return false;
25832 return bitmap_bit_p (DF_LR_OUT (bb), 12);
25835 /* Return whether we need to emit an ELFv2 global entry point prologue. */
25837 static bool
25838 rs6000_global_entry_point_needed_p (void)
25840 /* Only needed for the ELFv2 ABI. */
25841 if (DEFAULT_ABI != ABI_ELFv2)
25842 return false;
25844 /* With -msingle-pic-base, we assume the whole program shares the same
25845 TOC, so no global entry point prologues are needed anywhere. */
25846 if (TARGET_SINGLE_PIC_BASE)
25847 return false;
25849 /* Ensure we have a global entry point for thunks. ??? We could
25850 avoid that if the target routine doesn't need a global entry point,
25851 but we do not know whether this is the case at this point. */
25852 if (cfun->is_thunk)
25853 return true;
25855 /* For regular functions, rs6000_emit_prologue sets this flag if the
25856 routine ever uses the TOC pointer. */
25857 return cfun->machine->r2_setup_needed;
25860 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
25861 static sbitmap
25862 rs6000_get_separate_components (void)
25864 rs6000_stack_t *info = rs6000_stack_info ();
25866 if (WORLD_SAVE_P (info))
25867 return NULL;
25869 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
25870 && !(info->savres_strategy & REST_MULTIPLE));
25872 /* Component 0 is the save/restore of LR (done via GPR0).
25873 Component 2 is the save of the TOC (GPR2).
25874 Components 13..31 are the save/restore of GPR13..GPR31.
25875 Components 46..63 are the save/restore of FPR14..FPR31. */
25877 cfun->machine->n_components = 64;
25879 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
25880 bitmap_clear (components);
25882 int reg_size = TARGET_32BIT ? 4 : 8;
25883 int fp_reg_size = 8;
25885 /* The GPRs we need saved to the frame. */
25886 if ((info->savres_strategy & SAVE_INLINE_GPRS)
25887 && (info->savres_strategy & REST_INLINE_GPRS))
25889 int offset = info->gp_save_offset;
25890 if (info->push_p)
25891 offset += info->total_size;
25893 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
25895 if (IN_RANGE (offset, -0x8000, 0x7fff)
25896 && save_reg_p (regno))
25897 bitmap_set_bit (components, regno);
25899 offset += reg_size;
25903 /* Don't mess with the hard frame pointer. */
25904 if (frame_pointer_needed)
25905 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
25907 /* Don't mess with the fixed TOC register. */
25908 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
25909 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
25910 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
25911 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
25913 /* The FPRs we need saved to the frame. */
25914 if ((info->savres_strategy & SAVE_INLINE_FPRS)
25915 && (info->savres_strategy & REST_INLINE_FPRS))
25917 int offset = info->fp_save_offset;
25918 if (info->push_p)
25919 offset += info->total_size;
25921 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
25923 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
25924 bitmap_set_bit (components, regno);
25926 offset += fp_reg_size;
25930 /* Optimize LR save and restore if we can. This is component 0. Any
25931 out-of-line register save/restore routines need LR. */
25932 if (info->lr_save_p
25933 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25934 && (info->savres_strategy & SAVE_INLINE_GPRS)
25935 && (info->savres_strategy & REST_INLINE_GPRS)
25936 && (info->savres_strategy & SAVE_INLINE_FPRS)
25937 && (info->savres_strategy & REST_INLINE_FPRS)
25938 && (info->savres_strategy & SAVE_INLINE_VRS)
25939 && (info->savres_strategy & REST_INLINE_VRS))
25941 int offset = info->lr_save_offset;
25942 if (info->push_p)
25943 offset += info->total_size;
25944 if (IN_RANGE (offset, -0x8000, 0x7fff))
25945 bitmap_set_bit (components, 0);
25948 /* Optimize saving the TOC. This is component 2. */
25949 if (cfun->machine->save_toc_in_prologue)
25950 bitmap_set_bit (components, 2);
25952 return components;
25955 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
25956 static sbitmap
25957 rs6000_components_for_bb (basic_block bb)
25959 rs6000_stack_t *info = rs6000_stack_info ();
25961 bitmap in = DF_LIVE_IN (bb);
25962 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
25963 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
25965 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
25966 bitmap_clear (components);
25968 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
25970 /* GPRs. */
25971 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
25972 if (bitmap_bit_p (in, regno)
25973 || bitmap_bit_p (gen, regno)
25974 || bitmap_bit_p (kill, regno))
25975 bitmap_set_bit (components, regno);
25977 /* FPRs. */
25978 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
25979 if (bitmap_bit_p (in, regno)
25980 || bitmap_bit_p (gen, regno)
25981 || bitmap_bit_p (kill, regno))
25982 bitmap_set_bit (components, regno);
25984 /* The link register. */
25985 if (bitmap_bit_p (in, LR_REGNO)
25986 || bitmap_bit_p (gen, LR_REGNO)
25987 || bitmap_bit_p (kill, LR_REGNO))
25988 bitmap_set_bit (components, 0);
25990 /* The TOC save. */
25991 if (bitmap_bit_p (in, TOC_REGNUM)
25992 || bitmap_bit_p (gen, TOC_REGNUM)
25993 || bitmap_bit_p (kill, TOC_REGNUM))
25994 bitmap_set_bit (components, 2);
25996 return components;
25999 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26000 static void
26001 rs6000_disqualify_components (sbitmap components, edge e,
26002 sbitmap edge_components, bool /*is_prologue*/)
26004 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26005 live where we want to place that code. */
26006 if (bitmap_bit_p (edge_components, 0)
26007 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26009 if (dump_file)
26010 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26011 "on entry to bb %d\n", e->dest->index);
26012 bitmap_clear_bit (components, 0);
26016 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26017 static void
26018 rs6000_emit_prologue_components (sbitmap components)
26020 rs6000_stack_t *info = rs6000_stack_info ();
26021 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26022 ? HARD_FRAME_POINTER_REGNUM
26023 : STACK_POINTER_REGNUM);
26025 machine_mode reg_mode = Pmode;
26026 int reg_size = TARGET_32BIT ? 4 : 8;
26027 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26028 int fp_reg_size = 8;
26030 /* Prologue for LR. */
26031 if (bitmap_bit_p (components, 0))
26033 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26034 rtx reg = gen_rtx_REG (reg_mode, 0);
26035 rtx_insn *insn = emit_move_insn (reg, lr);
26036 RTX_FRAME_RELATED_P (insn) = 1;
26037 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (reg, lr));
26039 int offset = info->lr_save_offset;
26040 if (info->push_p)
26041 offset += info->total_size;
26043 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26044 RTX_FRAME_RELATED_P (insn) = 1;
26045 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26046 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26049 /* Prologue for TOC. */
26050 if (bitmap_bit_p (components, 2))
26052 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26053 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26054 emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
26057 /* Prologue for the GPRs. */
26058 int offset = info->gp_save_offset;
26059 if (info->push_p)
26060 offset += info->total_size;
26062 for (int i = info->first_gp_reg_save; i < 32; i++)
26064 if (bitmap_bit_p (components, i))
26066 rtx reg = gen_rtx_REG (reg_mode, i);
26067 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26068 RTX_FRAME_RELATED_P (insn) = 1;
26069 rtx set = copy_rtx (single_set (insn));
26070 add_reg_note (insn, REG_CFA_OFFSET, set);
26073 offset += reg_size;
26076 /* Prologue for the FPRs. */
26077 offset = info->fp_save_offset;
26078 if (info->push_p)
26079 offset += info->total_size;
26081 for (int i = info->first_fp_reg_save; i < 64; i++)
26083 if (bitmap_bit_p (components, i))
26085 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26086 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26087 RTX_FRAME_RELATED_P (insn) = 1;
26088 rtx set = copy_rtx (single_set (insn));
26089 add_reg_note (insn, REG_CFA_OFFSET, set);
26092 offset += fp_reg_size;
26096 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26097 static void
26098 rs6000_emit_epilogue_components (sbitmap components)
26100 rs6000_stack_t *info = rs6000_stack_info ();
26101 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26102 ? HARD_FRAME_POINTER_REGNUM
26103 : STACK_POINTER_REGNUM);
26105 machine_mode reg_mode = Pmode;
26106 int reg_size = TARGET_32BIT ? 4 : 8;
26108 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26109 int fp_reg_size = 8;
26111 /* Epilogue for the FPRs. */
26112 int offset = info->fp_save_offset;
26113 if (info->push_p)
26114 offset += info->total_size;
26116 for (int i = info->first_fp_reg_save; i < 64; i++)
26118 if (bitmap_bit_p (components, i))
26120 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26121 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26122 RTX_FRAME_RELATED_P (insn) = 1;
26123 add_reg_note (insn, REG_CFA_RESTORE, reg);
26126 offset += fp_reg_size;
26129 /* Epilogue for the GPRs. */
26130 offset = info->gp_save_offset;
26131 if (info->push_p)
26132 offset += info->total_size;
26134 for (int i = info->first_gp_reg_save; i < 32; i++)
26136 if (bitmap_bit_p (components, i))
26138 rtx reg = gen_rtx_REG (reg_mode, i);
26139 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26140 RTX_FRAME_RELATED_P (insn) = 1;
26141 add_reg_note (insn, REG_CFA_RESTORE, reg);
26144 offset += reg_size;
26147 /* Epilogue for LR. */
26148 if (bitmap_bit_p (components, 0))
26150 int offset = info->lr_save_offset;
26151 if (info->push_p)
26152 offset += info->total_size;
26154 rtx reg = gen_rtx_REG (reg_mode, 0);
26155 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26157 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26158 insn = emit_move_insn (lr, reg);
26159 RTX_FRAME_RELATED_P (insn) = 1;
26160 add_reg_note (insn, REG_CFA_RESTORE, lr);
26164 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26165 static void
26166 rs6000_set_handled_components (sbitmap components)
26168 rs6000_stack_t *info = rs6000_stack_info ();
26170 for (int i = info->first_gp_reg_save; i < 32; i++)
26171 if (bitmap_bit_p (components, i))
26172 cfun->machine->gpr_is_wrapped_separately[i] = true;
26174 for (int i = info->first_fp_reg_save; i < 64; i++)
26175 if (bitmap_bit_p (components, i))
26176 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26178 if (bitmap_bit_p (components, 0))
26179 cfun->machine->lr_is_wrapped_separately = true;
26181 if (bitmap_bit_p (components, 2))
26182 cfun->machine->toc_is_wrapped_separately = true;
26185 /* VRSAVE is a bit vector representing which AltiVec registers
26186 are used. The OS uses this to determine which vector
26187 registers to save on a context switch. We need to save
26188 VRSAVE on the stack frame, add whatever AltiVec registers we
26189 used in this function, and do the corresponding magic in the
26190 epilogue. */
26191 static void
26192 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26193 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26195 /* Get VRSAVE into a GPR. */
26196 rtx reg = gen_rtx_REG (SImode, save_regno);
26197 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26198 if (TARGET_MACHO)
26199 emit_insn (gen_get_vrsave_internal (reg));
26200 else
26201 emit_insn (gen_rtx_SET (reg, vrsave));
26203 /* Save VRSAVE. */
26204 int offset = info->vrsave_save_offset + frame_off;
26205 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26207 /* Include the registers in the mask. */
26208 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26210 emit_insn (generate_set_vrsave (reg, info, 0));
26213 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26214 called, it left the arg pointer to the old stack in r29. Otherwise, the
26215 arg pointer is the top of the current frame. */
26216 static void
26217 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26218 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26220 cfun->machine->split_stack_argp_used = true;
26222 if (sp_adjust)
26224 rtx r12 = gen_rtx_REG (Pmode, 12);
26225 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26226 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26227 emit_insn_before (set_r12, sp_adjust);
26229 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26231 rtx r12 = gen_rtx_REG (Pmode, 12);
26232 if (frame_off == 0)
26233 emit_move_insn (r12, frame_reg_rtx);
26234 else
26235 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26238 if (info->push_p)
26240 rtx r12 = gen_rtx_REG (Pmode, 12);
26241 rtx r29 = gen_rtx_REG (Pmode, 29);
26242 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26243 rtx not_more = gen_label_rtx ();
26244 rtx jump;
26246 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26247 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26248 gen_rtx_LABEL_REF (VOIDmode, not_more),
26249 pc_rtx);
26250 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26251 JUMP_LABEL (jump) = not_more;
26252 LABEL_NUSES (not_more) += 1;
26253 emit_move_insn (r12, r29);
26254 emit_label (not_more);
26258 /* Emit function prologue as insns. */
26260 void
26261 rs6000_emit_prologue (void)
26263 rs6000_stack_t *info = rs6000_stack_info ();
26264 machine_mode reg_mode = Pmode;
26265 int reg_size = TARGET_32BIT ? 4 : 8;
26266 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26267 int fp_reg_size = 8;
26268 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26269 rtx frame_reg_rtx = sp_reg_rtx;
26270 unsigned int cr_save_regno;
26271 rtx cr_save_rtx = NULL_RTX;
26272 rtx_insn *insn;
26273 int strategy;
26274 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26275 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26276 && call_used_regs[STATIC_CHAIN_REGNUM]);
26277 int using_split_stack = (flag_split_stack
26278 && (lookup_attribute ("no_split_stack",
26279 DECL_ATTRIBUTES (cfun->decl))
26280 == NULL));
26282 /* Offset to top of frame for frame_reg and sp respectively. */
26283 HOST_WIDE_INT frame_off = 0;
26284 HOST_WIDE_INT sp_off = 0;
26285 /* sp_adjust is the stack adjusting instruction, tracked so that the
26286 insn setting up the split-stack arg pointer can be emitted just
26287 prior to it, when r12 is not used here for other purposes. */
26288 rtx_insn *sp_adjust = 0;
26290 #if CHECKING_P
26291 /* Track and check usage of r0, r11, r12. */
26292 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26293 #define START_USE(R) do \
26295 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26296 reg_inuse |= 1 << (R); \
26297 } while (0)
26298 #define END_USE(R) do \
26300 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26301 reg_inuse &= ~(1 << (R)); \
26302 } while (0)
26303 #define NOT_INUSE(R) do \
26305 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26306 } while (0)
26307 #else
26308 #define START_USE(R) do {} while (0)
26309 #define END_USE(R) do {} while (0)
26310 #define NOT_INUSE(R) do {} while (0)
26311 #endif
26313 if (DEFAULT_ABI == ABI_ELFv2
26314 && !TARGET_SINGLE_PIC_BASE)
26316 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26318 /* With -mminimal-toc we may generate an extra use of r2 below. */
26319 if (TARGET_TOC && TARGET_MINIMAL_TOC
26320 && !constant_pool_empty_p ())
26321 cfun->machine->r2_setup_needed = true;
26325 if (flag_stack_usage_info)
26326 current_function_static_stack_size = info->total_size;
26328 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26330 HOST_WIDE_INT size = info->total_size;
26332 if (crtl->is_leaf && !cfun->calls_alloca)
26334 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
26335 rs6000_emit_probe_stack_range (get_stack_check_protect (),
26336 size - get_stack_check_protect ());
26338 else if (size > 0)
26339 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
26342 if (TARGET_FIX_AND_CONTINUE)
26344 /* gdb on darwin arranges to forward a function from the old
26345 address by modifying the first 5 instructions of the function
26346 to branch to the overriding function. This is necessary to
26347 permit function pointers that point to the old function to
26348 actually forward to the new function. */
26349 emit_insn (gen_nop ());
26350 emit_insn (gen_nop ());
26351 emit_insn (gen_nop ());
26352 emit_insn (gen_nop ());
26353 emit_insn (gen_nop ());
26356 /* Handle world saves specially here. */
26357 if (WORLD_SAVE_P (info))
26359 int i, j, sz;
26360 rtx treg;
26361 rtvec p;
26362 rtx reg0;
26364 /* save_world expects lr in r0. */
26365 reg0 = gen_rtx_REG (Pmode, 0);
26366 if (info->lr_save_p)
26368 insn = emit_move_insn (reg0,
26369 gen_rtx_REG (Pmode, LR_REGNO));
26370 RTX_FRAME_RELATED_P (insn) = 1;
26373 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26374 assumptions about the offsets of various bits of the stack
26375 frame. */
26376 gcc_assert (info->gp_save_offset == -220
26377 && info->fp_save_offset == -144
26378 && info->lr_save_offset == 8
26379 && info->cr_save_offset == 4
26380 && info->push_p
26381 && info->lr_save_p
26382 && (!crtl->calls_eh_return
26383 || info->ehrd_offset == -432)
26384 && info->vrsave_save_offset == -224
26385 && info->altivec_save_offset == -416);
26387 treg = gen_rtx_REG (SImode, 11);
26388 emit_move_insn (treg, GEN_INT (-info->total_size));
26390 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26391 in R11. It also clobbers R12, so beware! */
26393 /* Preserve CR2 for save_world prologues */
26394 sz = 5;
26395 sz += 32 - info->first_gp_reg_save;
26396 sz += 64 - info->first_fp_reg_save;
26397 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26398 p = rtvec_alloc (sz);
26399 j = 0;
26400 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
26401 gen_rtx_REG (SImode,
26402 LR_REGNO));
26403 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26404 gen_rtx_SYMBOL_REF (Pmode,
26405 "*save_world"));
26406 /* We do floats first so that the instruction pattern matches
26407 properly. */
26408 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26409 RTVEC_ELT (p, j++)
26410 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
26411 info->first_fp_reg_save + i),
26412 frame_reg_rtx,
26413 info->fp_save_offset + frame_off + 8 * i);
26414 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26415 RTVEC_ELT (p, j++)
26416 = gen_frame_store (gen_rtx_REG (V4SImode,
26417 info->first_altivec_reg_save + i),
26418 frame_reg_rtx,
26419 info->altivec_save_offset + frame_off + 16 * i);
26420 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26421 RTVEC_ELT (p, j++)
26422 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26423 frame_reg_rtx,
26424 info->gp_save_offset + frame_off + reg_size * i);
26426 /* CR register traditionally saved as CR2. */
26427 RTVEC_ELT (p, j++)
26428 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26429 frame_reg_rtx, info->cr_save_offset + frame_off);
26430 /* Explain about use of R0. */
26431 if (info->lr_save_p)
26432 RTVEC_ELT (p, j++)
26433 = gen_frame_store (reg0,
26434 frame_reg_rtx, info->lr_save_offset + frame_off);
26435 /* Explain what happens to the stack pointer. */
26437 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
26438 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
26441 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26442 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26443 treg, GEN_INT (-info->total_size));
26444 sp_off = frame_off = info->total_size;
26447 strategy = info->savres_strategy;
26449 /* For V.4, update stack before we do any saving and set back pointer. */
26450 if (! WORLD_SAVE_P (info)
26451 && info->push_p
26452 && (DEFAULT_ABI == ABI_V4
26453 || crtl->calls_eh_return))
26455 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
26456 || !(strategy & SAVE_INLINE_GPRS)
26457 || !(strategy & SAVE_INLINE_VRS));
26458 int ptr_regno = -1;
26459 rtx ptr_reg = NULL_RTX;
26460 int ptr_off = 0;
26462 if (info->total_size < 32767)
26463 frame_off = info->total_size;
26464 else if (need_r11)
26465 ptr_regno = 11;
26466 else if (info->cr_save_p
26467 || info->lr_save_p
26468 || info->first_fp_reg_save < 64
26469 || info->first_gp_reg_save < 32
26470 || info->altivec_size != 0
26471 || info->vrsave_size != 0
26472 || crtl->calls_eh_return)
26473 ptr_regno = 12;
26474 else
26476 /* The prologue won't be saving any regs so there is no need
26477 to set up a frame register to access any frame save area.
26478 We also won't be using frame_off anywhere below, but set
26479 the correct value anyway to protect against future
26480 changes to this function. */
26481 frame_off = info->total_size;
26483 if (ptr_regno != -1)
26485 /* Set up the frame offset to that needed by the first
26486 out-of-line save function. */
26487 START_USE (ptr_regno);
26488 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26489 frame_reg_rtx = ptr_reg;
26490 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
26491 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
26492 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
26493 ptr_off = info->gp_save_offset + info->gp_size;
26494 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
26495 ptr_off = info->altivec_save_offset + info->altivec_size;
26496 frame_off = -ptr_off;
26498 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26499 ptr_reg, ptr_off);
26500 if (REGNO (frame_reg_rtx) == 12)
26501 sp_adjust = 0;
26502 sp_off = info->total_size;
26503 if (frame_reg_rtx != sp_reg_rtx)
26504 rs6000_emit_stack_tie (frame_reg_rtx, false);
26507 /* If we use the link register, get it into r0. */
26508 if (!WORLD_SAVE_P (info) && info->lr_save_p
26509 && !cfun->machine->lr_is_wrapped_separately)
26511 rtx addr, reg, mem;
26513 reg = gen_rtx_REG (Pmode, 0);
26514 START_USE (0);
26515 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
26516 RTX_FRAME_RELATED_P (insn) = 1;
26518 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
26519 | SAVE_NOINLINE_FPRS_SAVES_LR)))
26521 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26522 GEN_INT (info->lr_save_offset + frame_off));
26523 mem = gen_rtx_MEM (Pmode, addr);
26524 /* This should not be of rs6000_sr_alias_set, because of
26525 __builtin_return_address. */
26527 insn = emit_move_insn (mem, reg);
26528 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26529 NULL_RTX, NULL_RTX);
26530 END_USE (0);
26534 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
26535 r12 will be needed by out-of-line gpr restore. */
26536 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26537 && !(strategy & (SAVE_INLINE_GPRS
26538 | SAVE_NOINLINE_GPRS_SAVES_LR))
26539 ? 11 : 12);
26540 if (!WORLD_SAVE_P (info)
26541 && info->cr_save_p
26542 && REGNO (frame_reg_rtx) != cr_save_regno
26543 && !(using_static_chain_p && cr_save_regno == 11)
26544 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
26546 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
26547 START_USE (cr_save_regno);
26548 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
26551 /* Do any required saving of fpr's. If only one or two to save, do
26552 it ourselves. Otherwise, call function. */
26553 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
26555 int offset = info->fp_save_offset + frame_off;
26556 for (int i = info->first_fp_reg_save; i < 64; i++)
26558 if (save_reg_p (i)
26559 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
26560 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
26561 sp_off - frame_off);
26563 offset += fp_reg_size;
26566 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
26568 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26569 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26570 unsigned ptr_regno = ptr_regno_for_savres (sel);
26571 rtx ptr_reg = frame_reg_rtx;
26573 if (REGNO (frame_reg_rtx) == ptr_regno)
26574 gcc_checking_assert (frame_off == 0);
26575 else
26577 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26578 NOT_INUSE (ptr_regno);
26579 emit_insn (gen_add3_insn (ptr_reg,
26580 frame_reg_rtx, GEN_INT (frame_off)));
26582 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26583 info->fp_save_offset,
26584 info->lr_save_offset,
26585 DFmode, sel);
26586 rs6000_frame_related (insn, ptr_reg, sp_off,
26587 NULL_RTX, NULL_RTX);
26588 if (lr)
26589 END_USE (0);
26592 /* Save GPRs. This is done as a PARALLEL if we are using
26593 the store-multiple instructions. */
26594 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
26596 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
26597 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
26598 unsigned ptr_regno = ptr_regno_for_savres (sel);
26599 rtx ptr_reg = frame_reg_rtx;
26600 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
26601 int end_save = info->gp_save_offset + info->gp_size;
26602 int ptr_off;
26604 if (ptr_regno == 12)
26605 sp_adjust = 0;
26606 if (!ptr_set_up)
26607 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26609 /* Need to adjust r11 (r12) if we saved any FPRs. */
26610 if (end_save + frame_off != 0)
26612 rtx offset = GEN_INT (end_save + frame_off);
26614 if (ptr_set_up)
26615 frame_off = -end_save;
26616 else
26617 NOT_INUSE (ptr_regno);
26618 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26620 else if (!ptr_set_up)
26622 NOT_INUSE (ptr_regno);
26623 emit_move_insn (ptr_reg, frame_reg_rtx);
26625 ptr_off = -end_save;
26626 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26627 info->gp_save_offset + ptr_off,
26628 info->lr_save_offset + ptr_off,
26629 reg_mode, sel);
26630 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
26631 NULL_RTX, NULL_RTX);
26632 if (lr)
26633 END_USE (0);
26635 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
26637 rtvec p;
26638 int i;
26639 p = rtvec_alloc (32 - info->first_gp_reg_save);
26640 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26641 RTVEC_ELT (p, i)
26642 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26643 frame_reg_rtx,
26644 info->gp_save_offset + frame_off + reg_size * i);
26645 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26646 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26647 NULL_RTX, NULL_RTX);
26649 else if (!WORLD_SAVE_P (info))
26651 int offset = info->gp_save_offset + frame_off;
26652 for (int i = info->first_gp_reg_save; i < 32; i++)
26654 if (save_reg_p (i)
26655 && !cfun->machine->gpr_is_wrapped_separately[i])
26656 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
26657 sp_off - frame_off);
26659 offset += reg_size;
26663 if (crtl->calls_eh_return)
26665 unsigned int i;
26666 rtvec p;
26668 for (i = 0; ; ++i)
26670 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26671 if (regno == INVALID_REGNUM)
26672 break;
26675 p = rtvec_alloc (i);
26677 for (i = 0; ; ++i)
26679 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26680 if (regno == INVALID_REGNUM)
26681 break;
26683 rtx set
26684 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
26685 sp_reg_rtx,
26686 info->ehrd_offset + sp_off + reg_size * (int) i);
26687 RTVEC_ELT (p, i) = set;
26688 RTX_FRAME_RELATED_P (set) = 1;
26691 insn = emit_insn (gen_blockage ());
26692 RTX_FRAME_RELATED_P (insn) = 1;
26693 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
26696 /* In AIX ABI we need to make sure r2 is really saved. */
26697 if (TARGET_AIX && crtl->calls_eh_return)
26699 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
26700 rtx join_insn, note;
26701 rtx_insn *save_insn;
26702 long toc_restore_insn;
26704 tmp_reg = gen_rtx_REG (Pmode, 11);
26705 tmp_reg_si = gen_rtx_REG (SImode, 11);
26706 if (using_static_chain_p)
26708 START_USE (0);
26709 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
26711 else
26712 START_USE (11);
26713 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
26714 /* Peek at instruction to which this function returns. If it's
26715 restoring r2, then we know we've already saved r2. We can't
26716 unconditionally save r2 because the value we have will already
26717 be updated if we arrived at this function via a plt call or
26718 toc adjusting stub. */
26719 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
26720 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
26721 + RS6000_TOC_SAVE_SLOT);
26722 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
26723 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
26724 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
26725 validate_condition_mode (EQ, CCUNSmode);
26726 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
26727 emit_insn (gen_rtx_SET (compare_result,
26728 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
26729 toc_save_done = gen_label_rtx ();
26730 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26731 gen_rtx_EQ (VOIDmode, compare_result,
26732 const0_rtx),
26733 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
26734 pc_rtx);
26735 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26736 JUMP_LABEL (jump) = toc_save_done;
26737 LABEL_NUSES (toc_save_done) += 1;
26739 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
26740 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
26741 sp_off - frame_off);
26743 emit_label (toc_save_done);
26745 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
26746 have a CFG that has different saves along different paths.
26747 Move the note to a dummy blockage insn, which describes that
26748 R2 is unconditionally saved after the label. */
26749 /* ??? An alternate representation might be a special insn pattern
26750 containing both the branch and the store. That might let the
26751 code that minimizes the number of DW_CFA_advance opcodes better
26752 freedom in placing the annotations. */
26753 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
26754 if (note)
26755 remove_note (save_insn, note);
26756 else
26757 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
26758 copy_rtx (PATTERN (save_insn)), NULL_RTX);
26759 RTX_FRAME_RELATED_P (save_insn) = 0;
26761 join_insn = emit_insn (gen_blockage ());
26762 REG_NOTES (join_insn) = note;
26763 RTX_FRAME_RELATED_P (join_insn) = 1;
26765 if (using_static_chain_p)
26767 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
26768 END_USE (0);
26770 else
26771 END_USE (11);
26774 /* Save CR if we use any that must be preserved. */
26775 if (!WORLD_SAVE_P (info) && info->cr_save_p)
26777 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26778 GEN_INT (info->cr_save_offset + frame_off));
26779 rtx mem = gen_frame_mem (SImode, addr);
26781 /* If we didn't copy cr before, do so now using r0. */
26782 if (cr_save_rtx == NULL_RTX)
26784 START_USE (0);
26785 cr_save_rtx = gen_rtx_REG (SImode, 0);
26786 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
26789 /* Saving CR requires a two-instruction sequence: one instruction
26790 to move the CR to a general-purpose register, and a second
26791 instruction that stores the GPR to memory.
26793 We do not emit any DWARF CFI records for the first of these,
26794 because we cannot properly represent the fact that CR is saved in
26795 a register. One reason is that we cannot express that multiple
26796 CR fields are saved; another reason is that on 64-bit, the size
26797 of the CR register in DWARF (4 bytes) differs from the size of
26798 a general-purpose register.
26800 This means if any intervening instruction were to clobber one of
26801 the call-saved CR fields, we'd have incorrect CFI. To prevent
26802 this from happening, we mark the store to memory as a use of
26803 those CR fields, which prevents any such instruction from being
26804 scheduled in between the two instructions. */
26805 rtx crsave_v[9];
26806 int n_crsave = 0;
26807 int i;
26809 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
26810 for (i = 0; i < 8; i++)
26811 if (save_reg_p (CR0_REGNO + i))
26812 crsave_v[n_crsave++]
26813 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26815 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
26816 gen_rtvec_v (n_crsave, crsave_v)));
26817 END_USE (REGNO (cr_save_rtx));
26819 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
26820 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
26821 so we need to construct a frame expression manually. */
26822 RTX_FRAME_RELATED_P (insn) = 1;
26824 /* Update address to be stack-pointer relative, like
26825 rs6000_frame_related would do. */
26826 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26827 GEN_INT (info->cr_save_offset + sp_off));
26828 mem = gen_frame_mem (SImode, addr);
26830 if (DEFAULT_ABI == ABI_ELFv2)
26832 /* In the ELFv2 ABI we generate separate CFI records for each
26833 CR field that was actually saved. They all point to the
26834 same 32-bit stack slot. */
26835 rtx crframe[8];
26836 int n_crframe = 0;
26838 for (i = 0; i < 8; i++)
26839 if (save_reg_p (CR0_REGNO + i))
26841 crframe[n_crframe]
26842 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
26844 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
26845 n_crframe++;
26848 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26849 gen_rtx_PARALLEL (VOIDmode,
26850 gen_rtvec_v (n_crframe, crframe)));
26852 else
26854 /* In other ABIs, by convention, we use a single CR regnum to
26855 represent the fact that all call-saved CR fields are saved.
26856 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
26857 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
26858 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
26862 /* In the ELFv2 ABI we need to save all call-saved CR fields into
26863 *separate* slots if the routine calls __builtin_eh_return, so
26864 that they can be independently restored by the unwinder. */
26865 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26867 int i, cr_off = info->ehcr_offset;
26868 rtx crsave;
26870 /* ??? We might get better performance by using multiple mfocrf
26871 instructions. */
26872 crsave = gen_rtx_REG (SImode, 0);
26873 emit_insn (gen_prologue_movesi_from_cr (crsave));
26875 for (i = 0; i < 8; i++)
26876 if (!call_used_regs[CR0_REGNO + i])
26878 rtvec p = rtvec_alloc (2);
26879 RTVEC_ELT (p, 0)
26880 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
26881 RTVEC_ELT (p, 1)
26882 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26884 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26886 RTX_FRAME_RELATED_P (insn) = 1;
26887 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26888 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
26889 sp_reg_rtx, cr_off + sp_off));
26891 cr_off += reg_size;
26895 /* If we are emitting stack probes, but allocate no stack, then
26896 just note that in the dump file. */
26897 if (flag_stack_clash_protection
26898 && dump_file
26899 && !info->push_p)
26900 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
26902 /* Update stack and set back pointer unless this is V.4,
26903 for which it was done previously. */
26904 if (!WORLD_SAVE_P (info) && info->push_p
26905 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
26907 rtx ptr_reg = NULL;
26908 int ptr_off = 0;
26910 /* If saving altivec regs we need to be able to address all save
26911 locations using a 16-bit offset. */
26912 if ((strategy & SAVE_INLINE_VRS) == 0
26913 || (info->altivec_size != 0
26914 && (info->altivec_save_offset + info->altivec_size - 16
26915 + info->total_size - frame_off) > 32767)
26916 || (info->vrsave_size != 0
26917 && (info->vrsave_save_offset
26918 + info->total_size - frame_off) > 32767))
26920 int sel = SAVRES_SAVE | SAVRES_VR;
26921 unsigned ptr_regno = ptr_regno_for_savres (sel);
26923 if (using_static_chain_p
26924 && ptr_regno == STATIC_CHAIN_REGNUM)
26925 ptr_regno = 12;
26926 if (REGNO (frame_reg_rtx) != ptr_regno)
26927 START_USE (ptr_regno);
26928 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26929 frame_reg_rtx = ptr_reg;
26930 ptr_off = info->altivec_save_offset + info->altivec_size;
26931 frame_off = -ptr_off;
26933 else if (REGNO (frame_reg_rtx) == 1)
26934 frame_off = info->total_size;
26935 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26936 ptr_reg, ptr_off);
26937 if (REGNO (frame_reg_rtx) == 12)
26938 sp_adjust = 0;
26939 sp_off = info->total_size;
26940 if (frame_reg_rtx != sp_reg_rtx)
26941 rs6000_emit_stack_tie (frame_reg_rtx, false);
26944 /* Set frame pointer, if needed. */
26945 if (frame_pointer_needed)
26947 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
26948 sp_reg_rtx);
26949 RTX_FRAME_RELATED_P (insn) = 1;
26952 /* Save AltiVec registers if needed. Save here because the red zone does
26953 not always include AltiVec registers. */
26954 if (!WORLD_SAVE_P (info)
26955 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
26957 int end_save = info->altivec_save_offset + info->altivec_size;
26958 int ptr_off;
26959 /* Oddly, the vector save/restore functions point r0 at the end
26960 of the save area, then use r11 or r12 to load offsets for
26961 [reg+reg] addressing. */
26962 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26963 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
26964 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26966 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26967 NOT_INUSE (0);
26968 if (scratch_regno == 12)
26969 sp_adjust = 0;
26970 if (end_save + frame_off != 0)
26972 rtx offset = GEN_INT (end_save + frame_off);
26974 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26976 else
26977 emit_move_insn (ptr_reg, frame_reg_rtx);
26979 ptr_off = -end_save;
26980 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26981 info->altivec_save_offset + ptr_off,
26982 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
26983 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
26984 NULL_RTX, NULL_RTX);
26985 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
26987 /* The oddity mentioned above clobbered our frame reg. */
26988 emit_move_insn (frame_reg_rtx, ptr_reg);
26989 frame_off = ptr_off;
26992 else if (!WORLD_SAVE_P (info)
26993 && info->altivec_size != 0)
26995 int i;
26997 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26998 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27000 rtx areg, savereg, mem;
27001 HOST_WIDE_INT offset;
27003 offset = (info->altivec_save_offset + frame_off
27004 + 16 * (i - info->first_altivec_reg_save));
27006 savereg = gen_rtx_REG (V4SImode, i);
27008 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27010 mem = gen_frame_mem (V4SImode,
27011 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27012 GEN_INT (offset)));
27013 insn = emit_insn (gen_rtx_SET (mem, savereg));
27014 areg = NULL_RTX;
27016 else
27018 NOT_INUSE (0);
27019 areg = gen_rtx_REG (Pmode, 0);
27020 emit_move_insn (areg, GEN_INT (offset));
27022 /* AltiVec addressing mode is [reg+reg]. */
27023 mem = gen_frame_mem (V4SImode,
27024 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27026 /* Rather than emitting a generic move, force use of the stvx
27027 instruction, which we always want on ISA 2.07 (power8) systems.
27028 In particular we don't want xxpermdi/stxvd2x for little
27029 endian. */
27030 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27033 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27034 areg, GEN_INT (offset));
27038 /* VRSAVE is a bit vector representing which AltiVec registers
27039 are used. The OS uses this to determine which vector
27040 registers to save on a context switch. We need to save
27041 VRSAVE on the stack frame, add whatever AltiVec registers we
27042 used in this function, and do the corresponding magic in the
27043 epilogue. */
27045 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27047 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27048 be using r12 as frame_reg_rtx and r11 as the static chain
27049 pointer for nested functions. */
27050 int save_regno = 12;
27051 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27052 && !using_static_chain_p)
27053 save_regno = 11;
27054 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27056 save_regno = 11;
27057 if (using_static_chain_p)
27058 save_regno = 0;
27060 NOT_INUSE (save_regno);
27062 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27065 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27066 if (!TARGET_SINGLE_PIC_BASE
27067 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27068 && !constant_pool_empty_p ())
27069 || (DEFAULT_ABI == ABI_V4
27070 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27071 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27073 /* If emit_load_toc_table will use the link register, we need to save
27074 it. We use R12 for this purpose because emit_load_toc_table
27075 can use register 0. This allows us to use a plain 'blr' to return
27076 from the procedure more often. */
27077 int save_LR_around_toc_setup = (TARGET_ELF
27078 && DEFAULT_ABI == ABI_V4
27079 && flag_pic
27080 && ! info->lr_save_p
27081 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27082 if (save_LR_around_toc_setup)
27084 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27085 rtx tmp = gen_rtx_REG (Pmode, 12);
27087 sp_adjust = 0;
27088 insn = emit_move_insn (tmp, lr);
27089 RTX_FRAME_RELATED_P (insn) = 1;
27091 rs6000_emit_load_toc_table (TRUE);
27093 insn = emit_move_insn (lr, tmp);
27094 add_reg_note (insn, REG_CFA_RESTORE, lr);
27095 RTX_FRAME_RELATED_P (insn) = 1;
27097 else
27098 rs6000_emit_load_toc_table (TRUE);
27101 #if TARGET_MACHO
27102 if (!TARGET_SINGLE_PIC_BASE
27103 && DEFAULT_ABI == ABI_DARWIN
27104 && flag_pic && crtl->uses_pic_offset_table)
27106 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27107 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27109 /* Save and restore LR locally around this call (in R0). */
27110 if (!info->lr_save_p)
27111 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27113 emit_insn (gen_load_macho_picbase (src));
27115 emit_move_insn (gen_rtx_REG (Pmode,
27116 RS6000_PIC_OFFSET_TABLE_REGNUM),
27117 lr);
27119 if (!info->lr_save_p)
27120 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27122 #endif
27124 /* If we need to, save the TOC register after doing the stack setup.
27125 Do not emit eh frame info for this save. The unwinder wants info,
27126 conceptually attached to instructions in this function, about
27127 register values in the caller of this function. This R2 may have
27128 already been changed from the value in the caller.
27129 We don't attempt to write accurate DWARF EH frame info for R2
27130 because code emitted by gcc for a (non-pointer) function call
27131 doesn't save and restore R2. Instead, R2 is managed out-of-line
27132 by a linker generated plt call stub when the function resides in
27133 a shared library. This behavior is costly to describe in DWARF,
27134 both in terms of the size of DWARF info and the time taken in the
27135 unwinder to interpret it. R2 changes, apart from the
27136 calls_eh_return case earlier in this function, are handled by
27137 linux-unwind.h frob_update_context. */
27138 if (rs6000_save_toc_in_prologue_p ()
27139 && !cfun->machine->toc_is_wrapped_separately)
27141 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27142 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27145 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27146 if (using_split_stack && split_stack_arg_pointer_used_p ())
27147 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27150 /* Output .extern statements for the save/restore routines we use. */
27152 static void
27153 rs6000_output_savres_externs (FILE *file)
27155 rs6000_stack_t *info = rs6000_stack_info ();
27157 if (TARGET_DEBUG_STACK)
27158 debug_stack_info (info);
27160 /* Write .extern for any function we will call to save and restore
27161 fp values. */
27162 if (info->first_fp_reg_save < 64
27163 && !TARGET_MACHO
27164 && !TARGET_ELF)
27166 char *name;
27167 int regno = info->first_fp_reg_save - 32;
27169 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27171 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27172 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27173 name = rs6000_savres_routine_name (regno, sel);
27174 fprintf (file, "\t.extern %s\n", name);
27176 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27178 bool lr = (info->savres_strategy
27179 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27180 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27181 name = rs6000_savres_routine_name (regno, sel);
27182 fprintf (file, "\t.extern %s\n", name);
27187 /* Write function prologue. */
27189 static void
27190 rs6000_output_function_prologue (FILE *file)
27192 if (!cfun->is_thunk)
27193 rs6000_output_savres_externs (file);
27195 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27196 immediately after the global entry point label. */
27197 if (rs6000_global_entry_point_needed_p ())
27199 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27201 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27203 if (TARGET_CMODEL != CMODEL_LARGE)
27205 /* In the small and medium code models, we assume the TOC is less
27206 2 GB away from the text section, so it can be computed via the
27207 following two-instruction sequence. */
27208 char buf[256];
27210 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27211 fprintf (file, "0:\taddis 2,12,.TOC.-");
27212 assemble_name (file, buf);
27213 fprintf (file, "@ha\n");
27214 fprintf (file, "\taddi 2,2,.TOC.-");
27215 assemble_name (file, buf);
27216 fprintf (file, "@l\n");
27218 else
27220 /* In the large code model, we allow arbitrary offsets between the
27221 TOC and the text section, so we have to load the offset from
27222 memory. The data field is emitted directly before the global
27223 entry point in rs6000_elf_declare_function_name. */
27224 char buf[256];
27226 #ifdef HAVE_AS_ENTRY_MARKERS
27227 /* If supported by the linker, emit a marker relocation. If the
27228 total code size of the final executable or shared library
27229 happens to fit into 2 GB after all, the linker will replace
27230 this code sequence with the sequence for the small or medium
27231 code model. */
27232 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27233 #endif
27234 fprintf (file, "\tld 2,");
27235 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27236 assemble_name (file, buf);
27237 fprintf (file, "-");
27238 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27239 assemble_name (file, buf);
27240 fprintf (file, "(12)\n");
27241 fprintf (file, "\tadd 2,2,12\n");
27244 fputs ("\t.localentry\t", file);
27245 assemble_name (file, name);
27246 fputs (",.-", file);
27247 assemble_name (file, name);
27248 fputs ("\n", file);
27251 /* Output -mprofile-kernel code. This needs to be done here instead of
27252 in output_function_profile since it must go after the ELFv2 ABI
27253 local entry point. */
27254 if (TARGET_PROFILE_KERNEL && crtl->profile)
27256 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27257 gcc_assert (!TARGET_32BIT);
27259 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27261 /* In the ELFv2 ABI we have no compiler stack word. It must be
27262 the resposibility of _mcount to preserve the static chain
27263 register if required. */
27264 if (DEFAULT_ABI != ABI_ELFv2
27265 && cfun->static_chain_decl != NULL)
27267 asm_fprintf (file, "\tstd %s,24(%s)\n",
27268 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27269 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27270 asm_fprintf (file, "\tld %s,24(%s)\n",
27271 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27273 else
27274 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27277 rs6000_pic_labelno++;
27280 /* -mprofile-kernel code calls mcount before the function prolog,
27281 so a profiled leaf function should stay a leaf function. */
27282 static bool
27283 rs6000_keep_leaf_when_profiled ()
27285 return TARGET_PROFILE_KERNEL;
27288 /* Non-zero if vmx regs are restored before the frame pop, zero if
27289 we restore after the pop when possible. */
27290 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27292 /* Restoring cr is a two step process: loading a reg from the frame
27293 save, then moving the reg to cr. For ABI_V4 we must let the
27294 unwinder know that the stack location is no longer valid at or
27295 before the stack deallocation, but we can't emit a cfa_restore for
27296 cr at the stack deallocation like we do for other registers.
27297 The trouble is that it is possible for the move to cr to be
27298 scheduled after the stack deallocation. So say exactly where cr
27299 is located on each of the two insns. */
27301 static rtx
27302 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27304 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27305 rtx reg = gen_rtx_REG (SImode, regno);
27306 rtx_insn *insn = emit_move_insn (reg, mem);
27308 if (!exit_func && DEFAULT_ABI == ABI_V4)
27310 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27311 rtx set = gen_rtx_SET (reg, cr);
27313 add_reg_note (insn, REG_CFA_REGISTER, set);
27314 RTX_FRAME_RELATED_P (insn) = 1;
27316 return reg;
27319 /* Reload CR from REG. */
27321 static void
27322 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27324 int count = 0;
27325 int i;
27327 if (using_mfcr_multiple)
27329 for (i = 0; i < 8; i++)
27330 if (save_reg_p (CR0_REGNO + i))
27331 count++;
27332 gcc_assert (count);
27335 if (using_mfcr_multiple && count > 1)
27337 rtx_insn *insn;
27338 rtvec p;
27339 int ndx;
27341 p = rtvec_alloc (count);
27343 ndx = 0;
27344 for (i = 0; i < 8; i++)
27345 if (save_reg_p (CR0_REGNO + i))
27347 rtvec r = rtvec_alloc (2);
27348 RTVEC_ELT (r, 0) = reg;
27349 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27350 RTVEC_ELT (p, ndx) =
27351 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27352 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27353 ndx++;
27355 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27356 gcc_assert (ndx == count);
27358 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27359 CR field separately. */
27360 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27362 for (i = 0; i < 8; i++)
27363 if (save_reg_p (CR0_REGNO + i))
27364 add_reg_note (insn, REG_CFA_RESTORE,
27365 gen_rtx_REG (SImode, CR0_REGNO + i));
27367 RTX_FRAME_RELATED_P (insn) = 1;
27370 else
27371 for (i = 0; i < 8; i++)
27372 if (save_reg_p (CR0_REGNO + i))
27374 rtx insn = emit_insn (gen_movsi_to_cr_one
27375 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27377 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27378 CR field separately, attached to the insn that in fact
27379 restores this particular CR field. */
27380 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27382 add_reg_note (insn, REG_CFA_RESTORE,
27383 gen_rtx_REG (SImode, CR0_REGNO + i));
27385 RTX_FRAME_RELATED_P (insn) = 1;
27389 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27390 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27391 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27393 rtx_insn *insn = get_last_insn ();
27394 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27396 add_reg_note (insn, REG_CFA_RESTORE, cr);
27397 RTX_FRAME_RELATED_P (insn) = 1;
27401 /* Like cr, the move to lr instruction can be scheduled after the
27402 stack deallocation, but unlike cr, its stack frame save is still
27403 valid. So we only need to emit the cfa_restore on the correct
27404 instruction. */
27406 static void
27407 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27409 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27410 rtx reg = gen_rtx_REG (Pmode, regno);
27412 emit_move_insn (reg, mem);
27415 static void
27416 restore_saved_lr (int regno, bool exit_func)
27418 rtx reg = gen_rtx_REG (Pmode, regno);
27419 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27420 rtx_insn *insn = emit_move_insn (lr, reg);
27422 if (!exit_func && flag_shrink_wrap)
27424 add_reg_note (insn, REG_CFA_RESTORE, lr);
27425 RTX_FRAME_RELATED_P (insn) = 1;
27429 static rtx
27430 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27432 if (DEFAULT_ABI == ABI_ELFv2)
27434 int i;
27435 for (i = 0; i < 8; i++)
27436 if (save_reg_p (CR0_REGNO + i))
27438 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27439 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27440 cfa_restores);
27443 else if (info->cr_save_p)
27444 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27445 gen_rtx_REG (SImode, CR2_REGNO),
27446 cfa_restores);
27448 if (info->lr_save_p)
27449 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27450 gen_rtx_REG (Pmode, LR_REGNO),
27451 cfa_restores);
27452 return cfa_restores;
27455 /* Return true if OFFSET from stack pointer can be clobbered by signals.
27456 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
27457 below stack pointer not cloberred by signals. */
27459 static inline bool
27460 offset_below_red_zone_p (HOST_WIDE_INT offset)
27462 return offset < (DEFAULT_ABI == ABI_V4
27464 : TARGET_32BIT ? -220 : -288);
27467 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
27469 static void
27470 emit_cfa_restores (rtx cfa_restores)
27472 rtx_insn *insn = get_last_insn ();
27473 rtx *loc = &REG_NOTES (insn);
27475 while (*loc)
27476 loc = &XEXP (*loc, 1);
27477 *loc = cfa_restores;
27478 RTX_FRAME_RELATED_P (insn) = 1;
27481 /* Emit function epilogue as insns. */
27483 void
27484 rs6000_emit_epilogue (int sibcall)
27486 rs6000_stack_t *info;
27487 int restoring_GPRs_inline;
27488 int restoring_FPRs_inline;
27489 int using_load_multiple;
27490 int using_mtcr_multiple;
27491 int use_backchain_to_restore_sp;
27492 int restore_lr;
27493 int strategy;
27494 HOST_WIDE_INT frame_off = 0;
27495 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
27496 rtx frame_reg_rtx = sp_reg_rtx;
27497 rtx cfa_restores = NULL_RTX;
27498 rtx insn;
27499 rtx cr_save_reg = NULL_RTX;
27500 machine_mode reg_mode = Pmode;
27501 int reg_size = TARGET_32BIT ? 4 : 8;
27502 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
27503 int fp_reg_size = 8;
27504 int i;
27505 bool exit_func;
27506 unsigned ptr_regno;
27508 info = rs6000_stack_info ();
27510 strategy = info->savres_strategy;
27511 using_load_multiple = strategy & REST_MULTIPLE;
27512 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
27513 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
27514 using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
27515 || rs6000_tune == PROCESSOR_PPC603
27516 || rs6000_tune == PROCESSOR_PPC750
27517 || optimize_size);
27518 /* Restore via the backchain when we have a large frame, since this
27519 is more efficient than an addis, addi pair. The second condition
27520 here will not trigger at the moment; We don't actually need a
27521 frame pointer for alloca, but the generic parts of the compiler
27522 give us one anyway. */
27523 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
27524 ? info->lr_save_offset
27525 : 0) > 32767
27526 || (cfun->calls_alloca
27527 && !frame_pointer_needed));
27528 restore_lr = (info->lr_save_p
27529 && (restoring_FPRs_inline
27530 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
27531 && (restoring_GPRs_inline
27532 || info->first_fp_reg_save < 64)
27533 && !cfun->machine->lr_is_wrapped_separately);
27536 if (WORLD_SAVE_P (info))
27538 int i, j;
27539 char rname[30];
27540 const char *alloc_rname;
27541 rtvec p;
27543 /* eh_rest_world_r10 will return to the location saved in the LR
27544 stack slot (which is not likely to be our caller.)
27545 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
27546 rest_world is similar, except any R10 parameter is ignored.
27547 The exception-handling stuff that was here in 2.95 is no
27548 longer necessary. */
27550 p = rtvec_alloc (9
27551 + 32 - info->first_gp_reg_save
27552 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
27553 + 63 + 1 - info->first_fp_reg_save);
27555 strcpy (rname, ((crtl->calls_eh_return) ?
27556 "*eh_rest_world_r10" : "*rest_world"));
27557 alloc_rname = ggc_strdup (rname);
27559 j = 0;
27560 RTVEC_ELT (p, j++) = ret_rtx;
27561 RTVEC_ELT (p, j++)
27562 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
27563 /* The instruction pattern requires a clobber here;
27564 it is shared with the restVEC helper. */
27565 RTVEC_ELT (p, j++)
27566 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
27569 /* CR register traditionally saved as CR2. */
27570 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
27571 RTVEC_ELT (p, j++)
27572 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
27573 if (flag_shrink_wrap)
27575 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27576 gen_rtx_REG (Pmode, LR_REGNO),
27577 cfa_restores);
27578 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27582 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27584 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27585 RTVEC_ELT (p, j++)
27586 = gen_frame_load (reg,
27587 frame_reg_rtx, info->gp_save_offset + reg_size * i);
27588 if (flag_shrink_wrap
27589 && save_reg_p (info->first_gp_reg_save + i))
27590 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27592 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27594 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
27595 RTVEC_ELT (p, j++)
27596 = gen_frame_load (reg,
27597 frame_reg_rtx, info->altivec_save_offset + 16 * i);
27598 if (flag_shrink_wrap
27599 && save_reg_p (info->first_altivec_reg_save + i))
27600 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27602 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
27604 rtx reg = gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
27605 info->first_fp_reg_save + i);
27606 RTVEC_ELT (p, j++)
27607 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
27608 if (flag_shrink_wrap
27609 && save_reg_p (info->first_fp_reg_save + i))
27610 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27612 RTVEC_ELT (p, j++)
27613 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
27614 RTVEC_ELT (p, j++)
27615 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
27616 RTVEC_ELT (p, j++)
27617 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
27618 RTVEC_ELT (p, j++)
27619 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
27620 RTVEC_ELT (p, j++)
27621 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
27622 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
27624 if (flag_shrink_wrap)
27626 REG_NOTES (insn) = cfa_restores;
27627 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27628 RTX_FRAME_RELATED_P (insn) = 1;
27630 return;
27633 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
27634 if (info->push_p)
27635 frame_off = info->total_size;
27637 /* Restore AltiVec registers if we must do so before adjusting the
27638 stack. */
27639 if (info->altivec_size != 0
27640 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27641 || (DEFAULT_ABI != ABI_V4
27642 && offset_below_red_zone_p (info->altivec_save_offset))))
27644 int i;
27645 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27647 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27648 if (use_backchain_to_restore_sp)
27650 int frame_regno = 11;
27652 if ((strategy & REST_INLINE_VRS) == 0)
27654 /* Of r11 and r12, select the one not clobbered by an
27655 out-of-line restore function for the frame register. */
27656 frame_regno = 11 + 12 - scratch_regno;
27658 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
27659 emit_move_insn (frame_reg_rtx,
27660 gen_rtx_MEM (Pmode, sp_reg_rtx));
27661 frame_off = 0;
27663 else if (frame_pointer_needed)
27664 frame_reg_rtx = hard_frame_pointer_rtx;
27666 if ((strategy & REST_INLINE_VRS) == 0)
27668 int end_save = info->altivec_save_offset + info->altivec_size;
27669 int ptr_off;
27670 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27671 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27673 if (end_save + frame_off != 0)
27675 rtx offset = GEN_INT (end_save + frame_off);
27677 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27679 else
27680 emit_move_insn (ptr_reg, frame_reg_rtx);
27682 ptr_off = -end_save;
27683 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27684 info->altivec_save_offset + ptr_off,
27685 0, V4SImode, SAVRES_VR);
27687 else
27689 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27690 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27692 rtx addr, areg, mem, insn;
27693 rtx reg = gen_rtx_REG (V4SImode, i);
27694 HOST_WIDE_INT offset
27695 = (info->altivec_save_offset + frame_off
27696 + 16 * (i - info->first_altivec_reg_save));
27698 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27700 mem = gen_frame_mem (V4SImode,
27701 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27702 GEN_INT (offset)));
27703 insn = gen_rtx_SET (reg, mem);
27705 else
27707 areg = gen_rtx_REG (Pmode, 0);
27708 emit_move_insn (areg, GEN_INT (offset));
27710 /* AltiVec addressing mode is [reg+reg]. */
27711 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27712 mem = gen_frame_mem (V4SImode, addr);
27714 /* Rather than emitting a generic move, force use of the
27715 lvx instruction, which we always want. In particular we
27716 don't want lxvd2x/xxpermdi for little endian. */
27717 insn = gen_altivec_lvx_v4si_internal (reg, mem);
27720 (void) emit_insn (insn);
27724 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27725 if (((strategy & REST_INLINE_VRS) == 0
27726 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27727 && (flag_shrink_wrap
27728 || (offset_below_red_zone_p
27729 (info->altivec_save_offset
27730 + 16 * (i - info->first_altivec_reg_save))))
27731 && save_reg_p (i))
27733 rtx reg = gen_rtx_REG (V4SImode, i);
27734 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27738 /* Restore VRSAVE if we must do so before adjusting the stack. */
27739 if (info->vrsave_size != 0
27740 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27741 || (DEFAULT_ABI != ABI_V4
27742 && offset_below_red_zone_p (info->vrsave_save_offset))))
27744 rtx reg;
27746 if (frame_reg_rtx == sp_reg_rtx)
27748 if (use_backchain_to_restore_sp)
27750 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27751 emit_move_insn (frame_reg_rtx,
27752 gen_rtx_MEM (Pmode, sp_reg_rtx));
27753 frame_off = 0;
27755 else if (frame_pointer_needed)
27756 frame_reg_rtx = hard_frame_pointer_rtx;
27759 reg = gen_rtx_REG (SImode, 12);
27760 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27761 info->vrsave_save_offset + frame_off));
27763 emit_insn (generate_set_vrsave (reg, info, 1));
27766 insn = NULL_RTX;
27767 /* If we have a large stack frame, restore the old stack pointer
27768 using the backchain. */
27769 if (use_backchain_to_restore_sp)
27771 if (frame_reg_rtx == sp_reg_rtx)
27773 /* Under V.4, don't reset the stack pointer until after we're done
27774 loading the saved registers. */
27775 if (DEFAULT_ABI == ABI_V4)
27776 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27778 insn = emit_move_insn (frame_reg_rtx,
27779 gen_rtx_MEM (Pmode, sp_reg_rtx));
27780 frame_off = 0;
27782 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27783 && DEFAULT_ABI == ABI_V4)
27784 /* frame_reg_rtx has been set up by the altivec restore. */
27786 else
27788 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
27789 frame_reg_rtx = sp_reg_rtx;
27792 /* If we have a frame pointer, we can restore the old stack pointer
27793 from it. */
27794 else if (frame_pointer_needed)
27796 frame_reg_rtx = sp_reg_rtx;
27797 if (DEFAULT_ABI == ABI_V4)
27798 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27799 /* Prevent reordering memory accesses against stack pointer restore. */
27800 else if (cfun->calls_alloca
27801 || offset_below_red_zone_p (-info->total_size))
27802 rs6000_emit_stack_tie (frame_reg_rtx, true);
27804 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
27805 GEN_INT (info->total_size)));
27806 frame_off = 0;
27808 else if (info->push_p
27809 && DEFAULT_ABI != ABI_V4
27810 && !crtl->calls_eh_return)
27812 /* Prevent reordering memory accesses against stack pointer restore. */
27813 if (cfun->calls_alloca
27814 || offset_below_red_zone_p (-info->total_size))
27815 rs6000_emit_stack_tie (frame_reg_rtx, false);
27816 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
27817 GEN_INT (info->total_size)));
27818 frame_off = 0;
27820 if (insn && frame_reg_rtx == sp_reg_rtx)
27822 if (cfa_restores)
27824 REG_NOTES (insn) = cfa_restores;
27825 cfa_restores = NULL_RTX;
27827 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27828 RTX_FRAME_RELATED_P (insn) = 1;
27831 /* Restore AltiVec registers if we have not done so already. */
27832 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27833 && info->altivec_size != 0
27834 && (DEFAULT_ABI == ABI_V4
27835 || !offset_below_red_zone_p (info->altivec_save_offset)))
27837 int i;
27839 if ((strategy & REST_INLINE_VRS) == 0)
27841 int end_save = info->altivec_save_offset + info->altivec_size;
27842 int ptr_off;
27843 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27844 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27845 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27847 if (end_save + frame_off != 0)
27849 rtx offset = GEN_INT (end_save + frame_off);
27851 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27853 else
27854 emit_move_insn (ptr_reg, frame_reg_rtx);
27856 ptr_off = -end_save;
27857 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27858 info->altivec_save_offset + ptr_off,
27859 0, V4SImode, SAVRES_VR);
27860 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27862 /* Frame reg was clobbered by out-of-line save. Restore it
27863 from ptr_reg, and if we are calling out-of-line gpr or
27864 fpr restore set up the correct pointer and offset. */
27865 unsigned newptr_regno = 1;
27866 if (!restoring_GPRs_inline)
27868 bool lr = info->gp_save_offset + info->gp_size == 0;
27869 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27870 newptr_regno = ptr_regno_for_savres (sel);
27871 end_save = info->gp_save_offset + info->gp_size;
27873 else if (!restoring_FPRs_inline)
27875 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
27876 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27877 newptr_regno = ptr_regno_for_savres (sel);
27878 end_save = info->fp_save_offset + info->fp_size;
27881 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
27882 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
27884 if (end_save + ptr_off != 0)
27886 rtx offset = GEN_INT (end_save + ptr_off);
27888 frame_off = -end_save;
27889 if (TARGET_32BIT)
27890 emit_insn (gen_addsi3_carry (frame_reg_rtx,
27891 ptr_reg, offset));
27892 else
27893 emit_insn (gen_adddi3_carry (frame_reg_rtx,
27894 ptr_reg, offset));
27896 else
27898 frame_off = ptr_off;
27899 emit_move_insn (frame_reg_rtx, ptr_reg);
27903 else
27905 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27906 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27908 rtx addr, areg, mem, insn;
27909 rtx reg = gen_rtx_REG (V4SImode, i);
27910 HOST_WIDE_INT offset
27911 = (info->altivec_save_offset + frame_off
27912 + 16 * (i - info->first_altivec_reg_save));
27914 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27916 mem = gen_frame_mem (V4SImode,
27917 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27918 GEN_INT (offset)));
27919 insn = gen_rtx_SET (reg, mem);
27921 else
27923 areg = gen_rtx_REG (Pmode, 0);
27924 emit_move_insn (areg, GEN_INT (offset));
27926 /* AltiVec addressing mode is [reg+reg]. */
27927 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27928 mem = gen_frame_mem (V4SImode, addr);
27930 /* Rather than emitting a generic move, force use of the
27931 lvx instruction, which we always want. In particular we
27932 don't want lxvd2x/xxpermdi for little endian. */
27933 insn = gen_altivec_lvx_v4si_internal (reg, mem);
27936 (void) emit_insn (insn);
27940 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27941 if (((strategy & REST_INLINE_VRS) == 0
27942 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27943 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27944 && save_reg_p (i))
27946 rtx reg = gen_rtx_REG (V4SImode, i);
27947 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27951 /* Restore VRSAVE if we have not done so already. */
27952 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27953 && info->vrsave_size != 0
27954 && (DEFAULT_ABI == ABI_V4
27955 || !offset_below_red_zone_p (info->vrsave_save_offset)))
27957 rtx reg;
27959 reg = gen_rtx_REG (SImode, 12);
27960 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27961 info->vrsave_save_offset + frame_off));
27963 emit_insn (generate_set_vrsave (reg, info, 1));
27966 /* If we exit by an out-of-line restore function on ABI_V4 then that
27967 function will deallocate the stack, so we don't need to worry
27968 about the unwinder restoring cr from an invalid stack frame
27969 location. */
27970 exit_func = (!restoring_FPRs_inline
27971 || (!restoring_GPRs_inline
27972 && info->first_fp_reg_save == 64));
27974 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
27975 *separate* slots if the routine calls __builtin_eh_return, so
27976 that they can be independently restored by the unwinder. */
27977 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27979 int i, cr_off = info->ehcr_offset;
27981 for (i = 0; i < 8; i++)
27982 if (!call_used_regs[CR0_REGNO + i])
27984 rtx reg = gen_rtx_REG (SImode, 0);
27985 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27986 cr_off + frame_off));
27988 insn = emit_insn (gen_movsi_to_cr_one
27989 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27991 if (!exit_func && flag_shrink_wrap)
27993 add_reg_note (insn, REG_CFA_RESTORE,
27994 gen_rtx_REG (SImode, CR0_REGNO + i));
27996 RTX_FRAME_RELATED_P (insn) = 1;
27999 cr_off += reg_size;
28003 /* Get the old lr if we saved it. If we are restoring registers
28004 out-of-line, then the out-of-line routines can do this for us. */
28005 if (restore_lr && restoring_GPRs_inline)
28006 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28008 /* Get the old cr if we saved it. */
28009 if (info->cr_save_p)
28011 unsigned cr_save_regno = 12;
28013 if (!restoring_GPRs_inline)
28015 /* Ensure we don't use the register used by the out-of-line
28016 gpr register restore below. */
28017 bool lr = info->gp_save_offset + info->gp_size == 0;
28018 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28019 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28021 if (gpr_ptr_regno == 12)
28022 cr_save_regno = 11;
28023 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28025 else if (REGNO (frame_reg_rtx) == 12)
28026 cr_save_regno = 11;
28028 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28029 info->cr_save_offset + frame_off,
28030 exit_func);
28033 /* Set LR here to try to overlap restores below. */
28034 if (restore_lr && restoring_GPRs_inline)
28035 restore_saved_lr (0, exit_func);
28037 /* Load exception handler data registers, if needed. */
28038 if (crtl->calls_eh_return)
28040 unsigned int i, regno;
28042 if (TARGET_AIX)
28044 rtx reg = gen_rtx_REG (reg_mode, 2);
28045 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28046 frame_off + RS6000_TOC_SAVE_SLOT));
28049 for (i = 0; ; ++i)
28051 rtx mem;
28053 regno = EH_RETURN_DATA_REGNO (i);
28054 if (regno == INVALID_REGNUM)
28055 break;
28057 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28058 info->ehrd_offset + frame_off
28059 + reg_size * (int) i);
28061 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28065 /* Restore GPRs. This is done as a PARALLEL if we are using
28066 the load-multiple instructions. */
28067 if (!restoring_GPRs_inline)
28069 /* We are jumping to an out-of-line function. */
28070 rtx ptr_reg;
28071 int end_save = info->gp_save_offset + info->gp_size;
28072 bool can_use_exit = end_save == 0;
28073 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28074 int ptr_off;
28076 /* Emit stack reset code if we need it. */
28077 ptr_regno = ptr_regno_for_savres (sel);
28078 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28079 if (can_use_exit)
28080 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28081 else if (end_save + frame_off != 0)
28082 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28083 GEN_INT (end_save + frame_off)));
28084 else if (REGNO (frame_reg_rtx) != ptr_regno)
28085 emit_move_insn (ptr_reg, frame_reg_rtx);
28086 if (REGNO (frame_reg_rtx) == ptr_regno)
28087 frame_off = -end_save;
28089 if (can_use_exit && info->cr_save_p)
28090 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28092 ptr_off = -end_save;
28093 rs6000_emit_savres_rtx (info, ptr_reg,
28094 info->gp_save_offset + ptr_off,
28095 info->lr_save_offset + ptr_off,
28096 reg_mode, sel);
28098 else if (using_load_multiple)
28100 rtvec p;
28101 p = rtvec_alloc (32 - info->first_gp_reg_save);
28102 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28103 RTVEC_ELT (p, i)
28104 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28105 frame_reg_rtx,
28106 info->gp_save_offset + frame_off + reg_size * i);
28107 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28109 else
28111 int offset = info->gp_save_offset + frame_off;
28112 for (i = info->first_gp_reg_save; i < 32; i++)
28114 if (save_reg_p (i)
28115 && !cfun->machine->gpr_is_wrapped_separately[i])
28117 rtx reg = gen_rtx_REG (reg_mode, i);
28118 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28121 offset += reg_size;
28125 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28127 /* If the frame pointer was used then we can't delay emitting
28128 a REG_CFA_DEF_CFA note. This must happen on the insn that
28129 restores the frame pointer, r31. We may have already emitted
28130 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28131 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28132 be harmless if emitted. */
28133 if (frame_pointer_needed)
28135 insn = get_last_insn ();
28136 add_reg_note (insn, REG_CFA_DEF_CFA,
28137 plus_constant (Pmode, frame_reg_rtx, frame_off));
28138 RTX_FRAME_RELATED_P (insn) = 1;
28141 /* Set up cfa_restores. We always need these when
28142 shrink-wrapping. If not shrink-wrapping then we only need
28143 the cfa_restore when the stack location is no longer valid.
28144 The cfa_restores must be emitted on or before the insn that
28145 invalidates the stack, and of course must not be emitted
28146 before the insn that actually does the restore. The latter
28147 is why it is a bad idea to emit the cfa_restores as a group
28148 on the last instruction here that actually does a restore:
28149 That insn may be reordered with respect to others doing
28150 restores. */
28151 if (flag_shrink_wrap
28152 && !restoring_GPRs_inline
28153 && info->first_fp_reg_save == 64)
28154 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28156 for (i = info->first_gp_reg_save; i < 32; i++)
28157 if (save_reg_p (i)
28158 && !cfun->machine->gpr_is_wrapped_separately[i])
28160 rtx reg = gen_rtx_REG (reg_mode, i);
28161 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28165 if (!restoring_GPRs_inline
28166 && info->first_fp_reg_save == 64)
28168 /* We are jumping to an out-of-line function. */
28169 if (cfa_restores)
28170 emit_cfa_restores (cfa_restores);
28171 return;
28174 if (restore_lr && !restoring_GPRs_inline)
28176 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28177 restore_saved_lr (0, exit_func);
28180 /* Restore fpr's if we need to do it without calling a function. */
28181 if (restoring_FPRs_inline)
28183 int offset = info->fp_save_offset + frame_off;
28184 for (i = info->first_fp_reg_save; i < 64; i++)
28186 if (save_reg_p (i)
28187 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28189 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28190 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28191 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28192 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28193 cfa_restores);
28196 offset += fp_reg_size;
28200 /* If we saved cr, restore it here. Just those that were used. */
28201 if (info->cr_save_p)
28202 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28204 /* If this is V.4, unwind the stack pointer after all of the loads
28205 have been done, or set up r11 if we are restoring fp out of line. */
28206 ptr_regno = 1;
28207 if (!restoring_FPRs_inline)
28209 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28210 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28211 ptr_regno = ptr_regno_for_savres (sel);
28214 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28215 if (REGNO (frame_reg_rtx) == ptr_regno)
28216 frame_off = 0;
28218 if (insn && restoring_FPRs_inline)
28220 if (cfa_restores)
28222 REG_NOTES (insn) = cfa_restores;
28223 cfa_restores = NULL_RTX;
28225 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28226 RTX_FRAME_RELATED_P (insn) = 1;
28229 if (crtl->calls_eh_return)
28231 rtx sa = EH_RETURN_STACKADJ_RTX;
28232 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28235 if (!sibcall && restoring_FPRs_inline)
28237 if (cfa_restores)
28239 /* We can't hang the cfa_restores off a simple return,
28240 since the shrink-wrap code sometimes uses an existing
28241 return. This means there might be a path from
28242 pre-prologue code to this return, and dwarf2cfi code
28243 wants the eh_frame unwinder state to be the same on
28244 all paths to any point. So we need to emit the
28245 cfa_restores before the return. For -m64 we really
28246 don't need epilogue cfa_restores at all, except for
28247 this irritating dwarf2cfi with shrink-wrap
28248 requirement; The stack red-zone means eh_frame info
28249 from the prologue telling the unwinder to restore
28250 from the stack is perfectly good right to the end of
28251 the function. */
28252 emit_insn (gen_blockage ());
28253 emit_cfa_restores (cfa_restores);
28254 cfa_restores = NULL_RTX;
28257 emit_jump_insn (targetm.gen_simple_return ());
28260 if (!sibcall && !restoring_FPRs_inline)
28262 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28263 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
28264 int elt = 0;
28265 RTVEC_ELT (p, elt++) = ret_rtx;
28266 if (lr)
28267 RTVEC_ELT (p, elt++)
28268 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
28270 /* We have to restore more than two FP registers, so branch to the
28271 restore function. It will return to our caller. */
28272 int i;
28273 int reg;
28274 rtx sym;
28276 if (flag_shrink_wrap)
28277 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28279 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
28280 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
28281 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28282 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28284 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28286 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28288 RTVEC_ELT (p, elt++)
28289 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28290 if (flag_shrink_wrap
28291 && save_reg_p (info->first_fp_reg_save + i))
28292 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28295 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28298 if (cfa_restores)
28300 if (sibcall)
28301 /* Ensure the cfa_restores are hung off an insn that won't
28302 be reordered above other restores. */
28303 emit_insn (gen_blockage ());
28305 emit_cfa_restores (cfa_restores);
28309 /* Write function epilogue. */
28311 static void
28312 rs6000_output_function_epilogue (FILE *file)
28314 #if TARGET_MACHO
28315 macho_branch_islands ();
28318 rtx_insn *insn = get_last_insn ();
28319 rtx_insn *deleted_debug_label = NULL;
28321 /* Mach-O doesn't support labels at the end of objects, so if
28322 it looks like we might want one, take special action.
28324 First, collect any sequence of deleted debug labels. */
28325 while (insn
28326 && NOTE_P (insn)
28327 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28329 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28330 notes only, instead set their CODE_LABEL_NUMBER to -1,
28331 otherwise there would be code generation differences
28332 in between -g and -g0. */
28333 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28334 deleted_debug_label = insn;
28335 insn = PREV_INSN (insn);
28338 /* Second, if we have:
28339 label:
28340 barrier
28341 then this needs to be detected, so skip past the barrier. */
28343 if (insn && BARRIER_P (insn))
28344 insn = PREV_INSN (insn);
28346 /* Up to now we've only seen notes or barriers. */
28347 if (insn)
28349 if (LABEL_P (insn)
28350 || (NOTE_P (insn)
28351 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
28352 /* Trailing label: <barrier>. */
28353 fputs ("\tnop\n", file);
28354 else
28356 /* Lastly, see if we have a completely empty function body. */
28357 while (insn && ! INSN_P (insn))
28358 insn = PREV_INSN (insn);
28359 /* If we don't find any insns, we've got an empty function body;
28360 I.e. completely empty - without a return or branch. This is
28361 taken as the case where a function body has been removed
28362 because it contains an inline __builtin_unreachable(). GCC
28363 states that reaching __builtin_unreachable() means UB so we're
28364 not obliged to do anything special; however, we want
28365 non-zero-sized function bodies. To meet this, and help the
28366 user out, let's trap the case. */
28367 if (insn == NULL)
28368 fputs ("\ttrap\n", file);
28371 else if (deleted_debug_label)
28372 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28373 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28374 CODE_LABEL_NUMBER (insn) = -1;
28376 #endif
28378 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28379 on its format.
28381 We don't output a traceback table if -finhibit-size-directive was
28382 used. The documentation for -finhibit-size-directive reads
28383 ``don't output a @code{.size} assembler directive, or anything
28384 else that would cause trouble if the function is split in the
28385 middle, and the two halves are placed at locations far apart in
28386 memory.'' The traceback table has this property, since it
28387 includes the offset from the start of the function to the
28388 traceback table itself.
28390 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28391 different traceback table. */
28392 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28393 && ! flag_inhibit_size_directive
28394 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28396 const char *fname = NULL;
28397 const char *language_string = lang_hooks.name;
28398 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28399 int i;
28400 int optional_tbtab;
28401 rs6000_stack_t *info = rs6000_stack_info ();
28403 if (rs6000_traceback == traceback_full)
28404 optional_tbtab = 1;
28405 else if (rs6000_traceback == traceback_part)
28406 optional_tbtab = 0;
28407 else
28408 optional_tbtab = !optimize_size && !TARGET_ELF;
28410 if (optional_tbtab)
28412 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28413 while (*fname == '.') /* V.4 encodes . in the name */
28414 fname++;
28416 /* Need label immediately before tbtab, so we can compute
28417 its offset from the function start. */
28418 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28419 ASM_OUTPUT_LABEL (file, fname);
28422 /* The .tbtab pseudo-op can only be used for the first eight
28423 expressions, since it can't handle the possibly variable
28424 length fields that follow. However, if you omit the optional
28425 fields, the assembler outputs zeros for all optional fields
28426 anyways, giving each variable length field is minimum length
28427 (as defined in sys/debug.h). Thus we can not use the .tbtab
28428 pseudo-op at all. */
28430 /* An all-zero word flags the start of the tbtab, for debuggers
28431 that have to find it by searching forward from the entry
28432 point or from the current pc. */
28433 fputs ("\t.long 0\n", file);
28435 /* Tbtab format type. Use format type 0. */
28436 fputs ("\t.byte 0,", file);
28438 /* Language type. Unfortunately, there does not seem to be any
28439 official way to discover the language being compiled, so we
28440 use language_string.
28441 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
28442 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
28443 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
28444 either, so for now use 0. */
28445 if (lang_GNU_C ()
28446 || ! strcmp (language_string, "GNU GIMPLE")
28447 || ! strcmp (language_string, "GNU Go")
28448 || ! strcmp (language_string, "libgccjit"))
28449 i = 0;
28450 else if (! strcmp (language_string, "GNU F77")
28451 || lang_GNU_Fortran ())
28452 i = 1;
28453 else if (! strcmp (language_string, "GNU Pascal"))
28454 i = 2;
28455 else if (! strcmp (language_string, "GNU Ada"))
28456 i = 3;
28457 else if (lang_GNU_CXX ()
28458 || ! strcmp (language_string, "GNU Objective-C++"))
28459 i = 9;
28460 else if (! strcmp (language_string, "GNU Java"))
28461 i = 13;
28462 else if (! strcmp (language_string, "GNU Objective-C"))
28463 i = 14;
28464 else
28465 gcc_unreachable ();
28466 fprintf (file, "%d,", i);
28468 /* 8 single bit fields: global linkage (not set for C extern linkage,
28469 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
28470 from start of procedure stored in tbtab, internal function, function
28471 has controlled storage, function has no toc, function uses fp,
28472 function logs/aborts fp operations. */
28473 /* Assume that fp operations are used if any fp reg must be saved. */
28474 fprintf (file, "%d,",
28475 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
28477 /* 6 bitfields: function is interrupt handler, name present in
28478 proc table, function calls alloca, on condition directives
28479 (controls stack walks, 3 bits), saves condition reg, saves
28480 link reg. */
28481 /* The `function calls alloca' bit seems to be set whenever reg 31 is
28482 set up as a frame pointer, even when there is no alloca call. */
28483 fprintf (file, "%d,",
28484 ((optional_tbtab << 6)
28485 | ((optional_tbtab & frame_pointer_needed) << 5)
28486 | (info->cr_save_p << 1)
28487 | (info->lr_save_p)));
28489 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
28490 (6 bits). */
28491 fprintf (file, "%d,",
28492 (info->push_p << 7) | (64 - info->first_fp_reg_save));
28494 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
28495 fprintf (file, "%d,", (32 - first_reg_to_save ()));
28497 if (optional_tbtab)
28499 /* Compute the parameter info from the function decl argument
28500 list. */
28501 tree decl;
28502 int next_parm_info_bit = 31;
28504 for (decl = DECL_ARGUMENTS (current_function_decl);
28505 decl; decl = DECL_CHAIN (decl))
28507 rtx parameter = DECL_INCOMING_RTL (decl);
28508 machine_mode mode = GET_MODE (parameter);
28510 if (GET_CODE (parameter) == REG)
28512 if (SCALAR_FLOAT_MODE_P (mode))
28514 int bits;
28516 float_parms++;
28518 switch (mode)
28520 case E_SFmode:
28521 case E_SDmode:
28522 bits = 0x2;
28523 break;
28525 case E_DFmode:
28526 case E_DDmode:
28527 case E_TFmode:
28528 case E_TDmode:
28529 case E_IFmode:
28530 case E_KFmode:
28531 bits = 0x3;
28532 break;
28534 default:
28535 gcc_unreachable ();
28538 /* If only one bit will fit, don't or in this entry. */
28539 if (next_parm_info_bit > 0)
28540 parm_info |= (bits << (next_parm_info_bit - 1));
28541 next_parm_info_bit -= 2;
28543 else
28545 fixed_parms += ((GET_MODE_SIZE (mode)
28546 + (UNITS_PER_WORD - 1))
28547 / UNITS_PER_WORD);
28548 next_parm_info_bit -= 1;
28554 /* Number of fixed point parameters. */
28555 /* This is actually the number of words of fixed point parameters; thus
28556 an 8 byte struct counts as 2; and thus the maximum value is 8. */
28557 fprintf (file, "%d,", fixed_parms);
28559 /* 2 bitfields: number of floating point parameters (7 bits), parameters
28560 all on stack. */
28561 /* This is actually the number of fp registers that hold parameters;
28562 and thus the maximum value is 13. */
28563 /* Set parameters on stack bit if parameters are not in their original
28564 registers, regardless of whether they are on the stack? Xlc
28565 seems to set the bit when not optimizing. */
28566 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
28568 if (optional_tbtab)
28570 /* Optional fields follow. Some are variable length. */
28572 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
28573 float, 11 double float. */
28574 /* There is an entry for each parameter in a register, in the order
28575 that they occur in the parameter list. Any intervening arguments
28576 on the stack are ignored. If the list overflows a long (max
28577 possible length 34 bits) then completely leave off all elements
28578 that don't fit. */
28579 /* Only emit this long if there was at least one parameter. */
28580 if (fixed_parms || float_parms)
28581 fprintf (file, "\t.long %d\n", parm_info);
28583 /* Offset from start of code to tb table. */
28584 fputs ("\t.long ", file);
28585 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28586 RS6000_OUTPUT_BASENAME (file, fname);
28587 putc ('-', file);
28588 rs6000_output_function_entry (file, fname);
28589 putc ('\n', file);
28591 /* Interrupt handler mask. */
28592 /* Omit this long, since we never set the interrupt handler bit
28593 above. */
28595 /* Number of CTL (controlled storage) anchors. */
28596 /* Omit this long, since the has_ctl bit is never set above. */
28598 /* Displacement into stack of each CTL anchor. */
28599 /* Omit this list of longs, because there are no CTL anchors. */
28601 /* Length of function name. */
28602 if (*fname == '*')
28603 ++fname;
28604 fprintf (file, "\t.short %d\n", (int) strlen (fname));
28606 /* Function name. */
28607 assemble_string (fname, strlen (fname));
28609 /* Register for alloca automatic storage; this is always reg 31.
28610 Only emit this if the alloca bit was set above. */
28611 if (frame_pointer_needed)
28612 fputs ("\t.byte 31\n", file);
28614 fputs ("\t.align 2\n", file);
28618 /* Arrange to define .LCTOC1 label, if not already done. */
28619 if (need_toc_init)
28621 need_toc_init = 0;
28622 if (!toc_initialized)
28624 switch_to_section (toc_section);
28625 switch_to_section (current_function_section ());
28630 /* -fsplit-stack support. */
28632 /* A SYMBOL_REF for __morestack. */
28633 static GTY(()) rtx morestack_ref;
28635 static rtx
28636 gen_add3_const (rtx rt, rtx ra, long c)
28638 if (TARGET_64BIT)
28639 return gen_adddi3 (rt, ra, GEN_INT (c));
28640 else
28641 return gen_addsi3 (rt, ra, GEN_INT (c));
28644 /* Emit -fsplit-stack prologue, which goes before the regular function
28645 prologue (at local entry point in the case of ELFv2). */
28647 void
28648 rs6000_expand_split_stack_prologue (void)
28650 rs6000_stack_t *info = rs6000_stack_info ();
28651 unsigned HOST_WIDE_INT allocate;
28652 long alloc_hi, alloc_lo;
28653 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
28654 rtx_insn *insn;
28656 gcc_assert (flag_split_stack && reload_completed);
28658 if (!info->push_p)
28659 return;
28661 if (global_regs[29])
28663 error ("%qs uses register r29", "-fsplit-stack");
28664 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
28665 "conflicts with %qD", global_regs_decl[29]);
28668 allocate = info->total_size;
28669 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
28671 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
28672 return;
28674 if (morestack_ref == NULL_RTX)
28676 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
28677 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
28678 | SYMBOL_FLAG_FUNCTION);
28681 r0 = gen_rtx_REG (Pmode, 0);
28682 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28683 r12 = gen_rtx_REG (Pmode, 12);
28684 emit_insn (gen_load_split_stack_limit (r0));
28685 /* Always emit two insns here to calculate the requested stack,
28686 so that the linker can edit them when adjusting size for calling
28687 non-split-stack code. */
28688 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
28689 alloc_lo = -allocate - alloc_hi;
28690 if (alloc_hi != 0)
28692 emit_insn (gen_add3_const (r12, r1, alloc_hi));
28693 if (alloc_lo != 0)
28694 emit_insn (gen_add3_const (r12, r12, alloc_lo));
28695 else
28696 emit_insn (gen_nop ());
28698 else
28700 emit_insn (gen_add3_const (r12, r1, alloc_lo));
28701 emit_insn (gen_nop ());
28704 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28705 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
28706 ok_label = gen_label_rtx ();
28707 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28708 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
28709 gen_rtx_LABEL_REF (VOIDmode, ok_label),
28710 pc_rtx);
28711 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28712 JUMP_LABEL (insn) = ok_label;
28713 /* Mark the jump as very likely to be taken. */
28714 add_reg_br_prob_note (insn, profile_probability::very_likely ());
28716 lr = gen_rtx_REG (Pmode, LR_REGNO);
28717 insn = emit_move_insn (r0, lr);
28718 RTX_FRAME_RELATED_P (insn) = 1;
28719 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
28720 RTX_FRAME_RELATED_P (insn) = 1;
28722 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
28723 const0_rtx, const0_rtx));
28724 call_fusage = NULL_RTX;
28725 use_reg (&call_fusage, r12);
28726 /* Say the call uses r0, even though it doesn't, to stop regrename
28727 from twiddling with the insns saving lr, trashing args for cfun.
28728 The insns restoring lr are similarly protected by making
28729 split_stack_return use r0. */
28730 use_reg (&call_fusage, r0);
28731 add_function_usage_to (insn, call_fusage);
28732 /* Indicate that this function can't jump to non-local gotos. */
28733 make_reg_eh_region_note_nothrow_nononlocal (insn);
28734 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
28735 insn = emit_move_insn (lr, r0);
28736 add_reg_note (insn, REG_CFA_RESTORE, lr);
28737 RTX_FRAME_RELATED_P (insn) = 1;
28738 emit_insn (gen_split_stack_return ());
28740 emit_label (ok_label);
28741 LABEL_NUSES (ok_label) = 1;
28744 /* Return the internal arg pointer used for function incoming
28745 arguments. When -fsplit-stack, the arg pointer is r12 so we need
28746 to copy it to a pseudo in order for it to be preserved over calls
28747 and suchlike. We'd really like to use a pseudo here for the
28748 internal arg pointer but data-flow analysis is not prepared to
28749 accept pseudos as live at the beginning of a function. */
28751 static rtx
28752 rs6000_internal_arg_pointer (void)
28754 if (flag_split_stack
28755 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
28756 == NULL))
28759 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
28761 rtx pat;
28763 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
28764 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
28766 /* Put the pseudo initialization right after the note at the
28767 beginning of the function. */
28768 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
28769 gen_rtx_REG (Pmode, 12));
28770 push_topmost_sequence ();
28771 emit_insn_after (pat, get_insns ());
28772 pop_topmost_sequence ();
28774 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
28775 FIRST_PARM_OFFSET (current_function_decl));
28776 return copy_to_reg (ret);
28778 return virtual_incoming_args_rtx;
28781 /* We may have to tell the dataflow pass that the split stack prologue
28782 is initializing a register. */
28784 static void
28785 rs6000_live_on_entry (bitmap regs)
28787 if (flag_split_stack)
28788 bitmap_set_bit (regs, 12);
28791 /* Emit -fsplit-stack dynamic stack allocation space check. */
28793 void
28794 rs6000_split_stack_space_check (rtx size, rtx label)
28796 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28797 rtx limit = gen_reg_rtx (Pmode);
28798 rtx requested = gen_reg_rtx (Pmode);
28799 rtx cmp = gen_reg_rtx (CCUNSmode);
28800 rtx jump;
28802 emit_insn (gen_load_split_stack_limit (limit));
28803 if (CONST_INT_P (size))
28804 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
28805 else
28807 size = force_reg (Pmode, size);
28808 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
28810 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
28811 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28812 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
28813 gen_rtx_LABEL_REF (VOIDmode, label),
28814 pc_rtx);
28815 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28816 JUMP_LABEL (jump) = label;
28819 /* A C compound statement that outputs the assembler code for a thunk
28820 function, used to implement C++ virtual function calls with
28821 multiple inheritance. The thunk acts as a wrapper around a virtual
28822 function, adjusting the implicit object parameter before handing
28823 control off to the real function.
28825 First, emit code to add the integer DELTA to the location that
28826 contains the incoming first argument. Assume that this argument
28827 contains a pointer, and is the one used to pass the `this' pointer
28828 in C++. This is the incoming argument *before* the function
28829 prologue, e.g. `%o0' on a sparc. The addition must preserve the
28830 values of all other incoming arguments.
28832 After the addition, emit code to jump to FUNCTION, which is a
28833 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
28834 not touch the return address. Hence returning from FUNCTION will
28835 return to whoever called the current `thunk'.
28837 The effect must be as if FUNCTION had been called directly with the
28838 adjusted first argument. This macro is responsible for emitting
28839 all of the code for a thunk function; output_function_prologue()
28840 and output_function_epilogue() are not invoked.
28842 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
28843 been extracted from it.) It might possibly be useful on some
28844 targets, but probably not.
28846 If you do not define this macro, the target-independent code in the
28847 C++ frontend will generate a less efficient heavyweight thunk that
28848 calls FUNCTION instead of jumping to it. The generic approach does
28849 not support varargs. */
28851 static void
28852 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
28853 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
28854 tree function)
28856 rtx this_rtx, funexp;
28857 rtx_insn *insn;
28859 reload_completed = 1;
28860 epilogue_completed = 1;
28862 /* Mark the end of the (empty) prologue. */
28863 emit_note (NOTE_INSN_PROLOGUE_END);
28865 /* Find the "this" pointer. If the function returns a structure,
28866 the structure return pointer is in r3. */
28867 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
28868 this_rtx = gen_rtx_REG (Pmode, 4);
28869 else
28870 this_rtx = gen_rtx_REG (Pmode, 3);
28872 /* Apply the constant offset, if required. */
28873 if (delta)
28874 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
28876 /* Apply the offset from the vtable, if required. */
28877 if (vcall_offset)
28879 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
28880 rtx tmp = gen_rtx_REG (Pmode, 12);
28882 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
28883 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
28885 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
28886 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
28888 else
28890 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
28892 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
28894 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
28897 /* Generate a tail call to the target function. */
28898 if (!TREE_USED (function))
28900 assemble_external (function);
28901 TREE_USED (function) = 1;
28903 funexp = XEXP (DECL_RTL (function), 0);
28904 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28906 #if TARGET_MACHO
28907 if (MACHOPIC_INDIRECT)
28908 funexp = machopic_indirect_call_target (funexp);
28909 #endif
28911 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
28912 generate sibcall RTL explicitly. */
28913 insn = emit_call_insn (
28914 gen_rtx_PARALLEL (VOIDmode,
28915 gen_rtvec (3,
28916 gen_rtx_CALL (VOIDmode,
28917 funexp, const0_rtx),
28918 gen_rtx_USE (VOIDmode, const0_rtx),
28919 simple_return_rtx)));
28920 SIBLING_CALL_P (insn) = 1;
28921 emit_barrier ();
28923 /* Run just enough of rest_of_compilation to get the insns emitted.
28924 There's not really enough bulk here to make other passes such as
28925 instruction scheduling worth while. Note that use_thunk calls
28926 assemble_start_function and assemble_end_function. */
28927 insn = get_insns ();
28928 shorten_branches (insn);
28929 final_start_function (insn, file, 1);
28930 final (insn, file, 1);
28931 final_end_function ();
28933 reload_completed = 0;
28934 epilogue_completed = 0;
28937 /* A quick summary of the various types of 'constant-pool tables'
28938 under PowerPC:
28940 Target Flags Name One table per
28941 AIX (none) AIX TOC object file
28942 AIX -mfull-toc AIX TOC object file
28943 AIX -mminimal-toc AIX minimal TOC translation unit
28944 SVR4/EABI (none) SVR4 SDATA object file
28945 SVR4/EABI -fpic SVR4 pic object file
28946 SVR4/EABI -fPIC SVR4 PIC translation unit
28947 SVR4/EABI -mrelocatable EABI TOC function
28948 SVR4/EABI -maix AIX TOC object file
28949 SVR4/EABI -maix -mminimal-toc
28950 AIX minimal TOC translation unit
28952 Name Reg. Set by entries contains:
28953 made by addrs? fp? sum?
28955 AIX TOC 2 crt0 as Y option option
28956 AIX minimal TOC 30 prolog gcc Y Y option
28957 SVR4 SDATA 13 crt0 gcc N Y N
28958 SVR4 pic 30 prolog ld Y not yet N
28959 SVR4 PIC 30 prolog gcc Y option option
28960 EABI TOC 30 prolog gcc Y option option
28964 /* Hash functions for the hash table. */
28966 static unsigned
28967 rs6000_hash_constant (rtx k)
28969 enum rtx_code code = GET_CODE (k);
28970 machine_mode mode = GET_MODE (k);
28971 unsigned result = (code << 3) ^ mode;
28972 const char *format;
28973 int flen, fidx;
28975 format = GET_RTX_FORMAT (code);
28976 flen = strlen (format);
28977 fidx = 0;
28979 switch (code)
28981 case LABEL_REF:
28982 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
28984 case CONST_WIDE_INT:
28986 int i;
28987 flen = CONST_WIDE_INT_NUNITS (k);
28988 for (i = 0; i < flen; i++)
28989 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
28990 return result;
28993 case CONST_DOUBLE:
28994 if (mode != VOIDmode)
28995 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
28996 flen = 2;
28997 break;
28999 case CODE_LABEL:
29000 fidx = 3;
29001 break;
29003 default:
29004 break;
29007 for (; fidx < flen; fidx++)
29008 switch (format[fidx])
29010 case 's':
29012 unsigned i, len;
29013 const char *str = XSTR (k, fidx);
29014 len = strlen (str);
29015 result = result * 613 + len;
29016 for (i = 0; i < len; i++)
29017 result = result * 613 + (unsigned) str[i];
29018 break;
29020 case 'u':
29021 case 'e':
29022 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29023 break;
29024 case 'i':
29025 case 'n':
29026 result = result * 613 + (unsigned) XINT (k, fidx);
29027 break;
29028 case 'w':
29029 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29030 result = result * 613 + (unsigned) XWINT (k, fidx);
29031 else
29033 size_t i;
29034 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29035 result = result * 613 + (unsigned) (XWINT (k, fidx)
29036 >> CHAR_BIT * i);
29038 break;
29039 case '0':
29040 break;
29041 default:
29042 gcc_unreachable ();
29045 return result;
29048 hashval_t
29049 toc_hasher::hash (toc_hash_struct *thc)
29051 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29054 /* Compare H1 and H2 for equivalence. */
29056 bool
29057 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29059 rtx r1 = h1->key;
29060 rtx r2 = h2->key;
29062 if (h1->key_mode != h2->key_mode)
29063 return 0;
29065 return rtx_equal_p (r1, r2);
29068 /* These are the names given by the C++ front-end to vtables, and
29069 vtable-like objects. Ideally, this logic should not be here;
29070 instead, there should be some programmatic way of inquiring as
29071 to whether or not an object is a vtable. */
29073 #define VTABLE_NAME_P(NAME) \
29074 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29075 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29076 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29077 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29078 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29080 #ifdef NO_DOLLAR_IN_LABEL
29081 /* Return a GGC-allocated character string translating dollar signs in
29082 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29084 const char *
29085 rs6000_xcoff_strip_dollar (const char *name)
29087 char *strip, *p;
29088 const char *q;
29089 size_t len;
29091 q = (const char *) strchr (name, '$');
29093 if (q == 0 || q == name)
29094 return name;
29096 len = strlen (name);
29097 strip = XALLOCAVEC (char, len + 1);
29098 strcpy (strip, name);
29099 p = strip + (q - name);
29100 while (p)
29102 *p = '_';
29103 p = strchr (p + 1, '$');
29106 return ggc_alloc_string (strip, len);
29108 #endif
29110 void
29111 rs6000_output_symbol_ref (FILE *file, rtx x)
29113 const char *name = XSTR (x, 0);
29115 /* Currently C++ toc references to vtables can be emitted before it
29116 is decided whether the vtable is public or private. If this is
29117 the case, then the linker will eventually complain that there is
29118 a reference to an unknown section. Thus, for vtables only,
29119 we emit the TOC reference to reference the identifier and not the
29120 symbol. */
29121 if (VTABLE_NAME_P (name))
29123 RS6000_OUTPUT_BASENAME (file, name);
29125 else
29126 assemble_name (file, name);
29129 /* Output a TOC entry. We derive the entry name from what is being
29130 written. */
29132 void
29133 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29135 char buf[256];
29136 const char *name = buf;
29137 rtx base = x;
29138 HOST_WIDE_INT offset = 0;
29140 gcc_assert (!TARGET_NO_TOC);
29142 /* When the linker won't eliminate them, don't output duplicate
29143 TOC entries (this happens on AIX if there is any kind of TOC,
29144 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29145 CODE_LABELs. */
29146 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29148 struct toc_hash_struct *h;
29150 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29151 time because GGC is not initialized at that point. */
29152 if (toc_hash_table == NULL)
29153 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29155 h = ggc_alloc<toc_hash_struct> ();
29156 h->key = x;
29157 h->key_mode = mode;
29158 h->labelno = labelno;
29160 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29161 if (*found == NULL)
29162 *found = h;
29163 else /* This is indeed a duplicate.
29164 Set this label equal to that label. */
29166 fputs ("\t.set ", file);
29167 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29168 fprintf (file, "%d,", labelno);
29169 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29170 fprintf (file, "%d\n", ((*found)->labelno));
29172 #ifdef HAVE_AS_TLS
29173 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29174 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29175 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29177 fputs ("\t.set ", file);
29178 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29179 fprintf (file, "%d,", labelno);
29180 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29181 fprintf (file, "%d\n", ((*found)->labelno));
29183 #endif
29184 return;
29188 /* If we're going to put a double constant in the TOC, make sure it's
29189 aligned properly when strict alignment is on. */
29190 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29191 && STRICT_ALIGNMENT
29192 && GET_MODE_BITSIZE (mode) >= 64
29193 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29194 ASM_OUTPUT_ALIGN (file, 3);
29197 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29199 /* Handle FP constants specially. Note that if we have a minimal
29200 TOC, things we put here aren't actually in the TOC, so we can allow
29201 FP constants. */
29202 if (GET_CODE (x) == CONST_DOUBLE &&
29203 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29204 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29206 long k[4];
29208 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29209 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29210 else
29211 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29213 if (TARGET_64BIT)
29215 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29216 fputs (DOUBLE_INT_ASM_OP, file);
29217 else
29218 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29219 k[0] & 0xffffffff, k[1] & 0xffffffff,
29220 k[2] & 0xffffffff, k[3] & 0xffffffff);
29221 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29222 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29223 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29224 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29225 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29226 return;
29228 else
29230 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29231 fputs ("\t.long ", file);
29232 else
29233 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29234 k[0] & 0xffffffff, k[1] & 0xffffffff,
29235 k[2] & 0xffffffff, k[3] & 0xffffffff);
29236 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29237 k[0] & 0xffffffff, k[1] & 0xffffffff,
29238 k[2] & 0xffffffff, k[3] & 0xffffffff);
29239 return;
29242 else if (GET_CODE (x) == CONST_DOUBLE &&
29243 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29245 long k[2];
29247 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29248 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29249 else
29250 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29252 if (TARGET_64BIT)
29254 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29255 fputs (DOUBLE_INT_ASM_OP, file);
29256 else
29257 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29258 k[0] & 0xffffffff, k[1] & 0xffffffff);
29259 fprintf (file, "0x%lx%08lx\n",
29260 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29261 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29262 return;
29264 else
29266 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29267 fputs ("\t.long ", file);
29268 else
29269 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29270 k[0] & 0xffffffff, k[1] & 0xffffffff);
29271 fprintf (file, "0x%lx,0x%lx\n",
29272 k[0] & 0xffffffff, k[1] & 0xffffffff);
29273 return;
29276 else if (GET_CODE (x) == CONST_DOUBLE &&
29277 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29279 long l;
29281 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29282 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29283 else
29284 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29286 if (TARGET_64BIT)
29288 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29289 fputs (DOUBLE_INT_ASM_OP, file);
29290 else
29291 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29292 if (WORDS_BIG_ENDIAN)
29293 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29294 else
29295 fprintf (file, "0x%lx\n", l & 0xffffffff);
29296 return;
29298 else
29300 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29301 fputs ("\t.long ", file);
29302 else
29303 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29304 fprintf (file, "0x%lx\n", l & 0xffffffff);
29305 return;
29308 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
29310 unsigned HOST_WIDE_INT low;
29311 HOST_WIDE_INT high;
29313 low = INTVAL (x) & 0xffffffff;
29314 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29316 /* TOC entries are always Pmode-sized, so when big-endian
29317 smaller integer constants in the TOC need to be padded.
29318 (This is still a win over putting the constants in
29319 a separate constant pool, because then we'd have
29320 to have both a TOC entry _and_ the actual constant.)
29322 For a 32-bit target, CONST_INT values are loaded and shifted
29323 entirely within `low' and can be stored in one TOC entry. */
29325 /* It would be easy to make this work, but it doesn't now. */
29326 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29328 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29330 low |= high << 32;
29331 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29332 high = (HOST_WIDE_INT) low >> 32;
29333 low &= 0xffffffff;
29336 if (TARGET_64BIT)
29338 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29339 fputs (DOUBLE_INT_ASM_OP, file);
29340 else
29341 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29342 (long) high & 0xffffffff, (long) low & 0xffffffff);
29343 fprintf (file, "0x%lx%08lx\n",
29344 (long) high & 0xffffffff, (long) low & 0xffffffff);
29345 return;
29347 else
29349 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29351 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29352 fputs ("\t.long ", file);
29353 else
29354 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29355 (long) high & 0xffffffff, (long) low & 0xffffffff);
29356 fprintf (file, "0x%lx,0x%lx\n",
29357 (long) high & 0xffffffff, (long) low & 0xffffffff);
29359 else
29361 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29362 fputs ("\t.long ", file);
29363 else
29364 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29365 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29367 return;
29371 if (GET_CODE (x) == CONST)
29373 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29374 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
29376 base = XEXP (XEXP (x, 0), 0);
29377 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29380 switch (GET_CODE (base))
29382 case SYMBOL_REF:
29383 name = XSTR (base, 0);
29384 break;
29386 case LABEL_REF:
29387 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29388 CODE_LABEL_NUMBER (XEXP (base, 0)));
29389 break;
29391 case CODE_LABEL:
29392 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29393 break;
29395 default:
29396 gcc_unreachable ();
29399 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29400 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29401 else
29403 fputs ("\t.tc ", file);
29404 RS6000_OUTPUT_BASENAME (file, name);
29406 if (offset < 0)
29407 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29408 else if (offset)
29409 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29411 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29412 after other TOC symbols, reducing overflow of small TOC access
29413 to [TC] symbols. */
29414 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29415 ? "[TE]," : "[TC],", file);
29418 /* Currently C++ toc references to vtables can be emitted before it
29419 is decided whether the vtable is public or private. If this is
29420 the case, then the linker will eventually complain that there is
29421 a TOC reference to an unknown section. Thus, for vtables only,
29422 we emit the TOC reference to reference the symbol and not the
29423 section. */
29424 if (VTABLE_NAME_P (name))
29426 RS6000_OUTPUT_BASENAME (file, name);
29427 if (offset < 0)
29428 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29429 else if (offset > 0)
29430 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29432 else
29433 output_addr_const (file, x);
29435 #if HAVE_AS_TLS
29436 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
29438 switch (SYMBOL_REF_TLS_MODEL (base))
29440 case 0:
29441 break;
29442 case TLS_MODEL_LOCAL_EXEC:
29443 fputs ("@le", file);
29444 break;
29445 case TLS_MODEL_INITIAL_EXEC:
29446 fputs ("@ie", file);
29447 break;
29448 /* Use global-dynamic for local-dynamic. */
29449 case TLS_MODEL_GLOBAL_DYNAMIC:
29450 case TLS_MODEL_LOCAL_DYNAMIC:
29451 putc ('\n', file);
29452 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
29453 fputs ("\t.tc .", file);
29454 RS6000_OUTPUT_BASENAME (file, name);
29455 fputs ("[TC],", file);
29456 output_addr_const (file, x);
29457 fputs ("@m", file);
29458 break;
29459 default:
29460 gcc_unreachable ();
29463 #endif
29465 putc ('\n', file);
29468 /* Output an assembler pseudo-op to write an ASCII string of N characters
29469 starting at P to FILE.
29471 On the RS/6000, we have to do this using the .byte operation and
29472 write out special characters outside the quoted string.
29473 Also, the assembler is broken; very long strings are truncated,
29474 so we must artificially break them up early. */
29476 void
29477 output_ascii (FILE *file, const char *p, int n)
29479 char c;
29480 int i, count_string;
29481 const char *for_string = "\t.byte \"";
29482 const char *for_decimal = "\t.byte ";
29483 const char *to_close = NULL;
29485 count_string = 0;
29486 for (i = 0; i < n; i++)
29488 c = *p++;
29489 if (c >= ' ' && c < 0177)
29491 if (for_string)
29492 fputs (for_string, file);
29493 putc (c, file);
29495 /* Write two quotes to get one. */
29496 if (c == '"')
29498 putc (c, file);
29499 ++count_string;
29502 for_string = NULL;
29503 for_decimal = "\"\n\t.byte ";
29504 to_close = "\"\n";
29505 ++count_string;
29507 if (count_string >= 512)
29509 fputs (to_close, file);
29511 for_string = "\t.byte \"";
29512 for_decimal = "\t.byte ";
29513 to_close = NULL;
29514 count_string = 0;
29517 else
29519 if (for_decimal)
29520 fputs (for_decimal, file);
29521 fprintf (file, "%d", c);
29523 for_string = "\n\t.byte \"";
29524 for_decimal = ", ";
29525 to_close = "\n";
29526 count_string = 0;
29530 /* Now close the string if we have written one. Then end the line. */
29531 if (to_close)
29532 fputs (to_close, file);
29535 /* Generate a unique section name for FILENAME for a section type
29536 represented by SECTION_DESC. Output goes into BUF.
29538 SECTION_DESC can be any string, as long as it is different for each
29539 possible section type.
29541 We name the section in the same manner as xlc. The name begins with an
29542 underscore followed by the filename (after stripping any leading directory
29543 names) with the last period replaced by the string SECTION_DESC. If
29544 FILENAME does not contain a period, SECTION_DESC is appended to the end of
29545 the name. */
29547 void
29548 rs6000_gen_section_name (char **buf, const char *filename,
29549 const char *section_desc)
29551 const char *q, *after_last_slash, *last_period = 0;
29552 char *p;
29553 int len;
29555 after_last_slash = filename;
29556 for (q = filename; *q; q++)
29558 if (*q == '/')
29559 after_last_slash = q + 1;
29560 else if (*q == '.')
29561 last_period = q;
29564 len = strlen (after_last_slash) + strlen (section_desc) + 2;
29565 *buf = (char *) xmalloc (len);
29567 p = *buf;
29568 *p++ = '_';
29570 for (q = after_last_slash; *q; q++)
29572 if (q == last_period)
29574 strcpy (p, section_desc);
29575 p += strlen (section_desc);
29576 break;
29579 else if (ISALNUM (*q))
29580 *p++ = *q;
29583 if (last_period == 0)
29584 strcpy (p, section_desc);
29585 else
29586 *p = '\0';
29589 /* Emit profile function. */
29591 void
29592 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
29594 /* Non-standard profiling for kernels, which just saves LR then calls
29595 _mcount without worrying about arg saves. The idea is to change
29596 the function prologue as little as possible as it isn't easy to
29597 account for arg save/restore code added just for _mcount. */
29598 if (TARGET_PROFILE_KERNEL)
29599 return;
29601 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29603 #ifndef NO_PROFILE_COUNTERS
29604 # define NO_PROFILE_COUNTERS 0
29605 #endif
29606 if (NO_PROFILE_COUNTERS)
29607 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29608 LCT_NORMAL, VOIDmode);
29609 else
29611 char buf[30];
29612 const char *label_name;
29613 rtx fun;
29615 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29616 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
29617 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
29619 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29620 LCT_NORMAL, VOIDmode, fun, Pmode);
29623 else if (DEFAULT_ABI == ABI_DARWIN)
29625 const char *mcount_name = RS6000_MCOUNT;
29626 int caller_addr_regno = LR_REGNO;
29628 /* Be conservative and always set this, at least for now. */
29629 crtl->uses_pic_offset_table = 1;
29631 #if TARGET_MACHO
29632 /* For PIC code, set up a stub and collect the caller's address
29633 from r0, which is where the prologue puts it. */
29634 if (MACHOPIC_INDIRECT
29635 && crtl->uses_pic_offset_table)
29636 caller_addr_regno = 0;
29637 #endif
29638 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
29639 LCT_NORMAL, VOIDmode,
29640 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
29644 /* Write function profiler code. */
29646 void
29647 output_function_profiler (FILE *file, int labelno)
29649 char buf[100];
29651 switch (DEFAULT_ABI)
29653 default:
29654 gcc_unreachable ();
29656 case ABI_V4:
29657 if (!TARGET_32BIT)
29659 warning (0, "no profiling of 64-bit code for this ABI");
29660 return;
29662 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29663 fprintf (file, "\tmflr %s\n", reg_names[0]);
29664 if (NO_PROFILE_COUNTERS)
29666 asm_fprintf (file, "\tstw %s,4(%s)\n",
29667 reg_names[0], reg_names[1]);
29669 else if (TARGET_SECURE_PLT && flag_pic)
29671 if (TARGET_LINK_STACK)
29673 char name[32];
29674 get_ppc476_thunk_name (name);
29675 asm_fprintf (file, "\tbl %s\n", name);
29677 else
29678 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
29679 asm_fprintf (file, "\tstw %s,4(%s)\n",
29680 reg_names[0], reg_names[1]);
29681 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29682 asm_fprintf (file, "\taddis %s,%s,",
29683 reg_names[12], reg_names[12]);
29684 assemble_name (file, buf);
29685 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
29686 assemble_name (file, buf);
29687 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
29689 else if (flag_pic == 1)
29691 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
29692 asm_fprintf (file, "\tstw %s,4(%s)\n",
29693 reg_names[0], reg_names[1]);
29694 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29695 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
29696 assemble_name (file, buf);
29697 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
29699 else if (flag_pic > 1)
29701 asm_fprintf (file, "\tstw %s,4(%s)\n",
29702 reg_names[0], reg_names[1]);
29703 /* Now, we need to get the address of the label. */
29704 if (TARGET_LINK_STACK)
29706 char name[32];
29707 get_ppc476_thunk_name (name);
29708 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
29709 assemble_name (file, buf);
29710 fputs ("-.\n1:", file);
29711 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29712 asm_fprintf (file, "\taddi %s,%s,4\n",
29713 reg_names[11], reg_names[11]);
29715 else
29717 fputs ("\tbcl 20,31,1f\n\t.long ", file);
29718 assemble_name (file, buf);
29719 fputs ("-.\n1:", file);
29720 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29722 asm_fprintf (file, "\tlwz %s,0(%s)\n",
29723 reg_names[0], reg_names[11]);
29724 asm_fprintf (file, "\tadd %s,%s,%s\n",
29725 reg_names[0], reg_names[0], reg_names[11]);
29727 else
29729 asm_fprintf (file, "\tlis %s,", reg_names[12]);
29730 assemble_name (file, buf);
29731 fputs ("@ha\n", file);
29732 asm_fprintf (file, "\tstw %s,4(%s)\n",
29733 reg_names[0], reg_names[1]);
29734 asm_fprintf (file, "\tla %s,", reg_names[0]);
29735 assemble_name (file, buf);
29736 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
29739 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
29740 fprintf (file, "\tbl %s%s\n",
29741 RS6000_MCOUNT, flag_pic ? "@plt" : "");
29742 break;
29744 case ABI_AIX:
29745 case ABI_ELFv2:
29746 case ABI_DARWIN:
29747 /* Don't do anything, done in output_profile_hook (). */
29748 break;
29754 /* The following variable value is the last issued insn. */
29756 static rtx_insn *last_scheduled_insn;
29758 /* The following variable helps to balance issuing of load and
29759 store instructions */
29761 static int load_store_pendulum;
29763 /* The following variable helps pair divide insns during scheduling. */
29764 static int divide_cnt;
29765 /* The following variable helps pair and alternate vector and vector load
29766 insns during scheduling. */
29767 static int vec_pairing;
29770 /* Power4 load update and store update instructions are cracked into a
29771 load or store and an integer insn which are executed in the same cycle.
29772 Branches have their own dispatch slot which does not count against the
29773 GCC issue rate, but it changes the program flow so there are no other
29774 instructions to issue in this cycle. */
29776 static int
29777 rs6000_variable_issue_1 (rtx_insn *insn, int more)
29779 last_scheduled_insn = insn;
29780 if (GET_CODE (PATTERN (insn)) == USE
29781 || GET_CODE (PATTERN (insn)) == CLOBBER)
29783 cached_can_issue_more = more;
29784 return cached_can_issue_more;
29787 if (insn_terminates_group_p (insn, current_group))
29789 cached_can_issue_more = 0;
29790 return cached_can_issue_more;
29793 /* If no reservation, but reach here */
29794 if (recog_memoized (insn) < 0)
29795 return more;
29797 if (rs6000_sched_groups)
29799 if (is_microcoded_insn (insn))
29800 cached_can_issue_more = 0;
29801 else if (is_cracked_insn (insn))
29802 cached_can_issue_more = more > 2 ? more - 2 : 0;
29803 else
29804 cached_can_issue_more = more - 1;
29806 return cached_can_issue_more;
29809 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
29810 return 0;
29812 cached_can_issue_more = more - 1;
29813 return cached_can_issue_more;
29816 static int
29817 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
29819 int r = rs6000_variable_issue_1 (insn, more);
29820 if (verbose)
29821 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
29822 return r;
29825 /* Adjust the cost of a scheduling dependency. Return the new cost of
29826 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
29828 static int
29829 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
29830 unsigned int)
29832 enum attr_type attr_type;
29834 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
29835 return cost;
29837 switch (dep_type)
29839 case REG_DEP_TRUE:
29841 /* Data dependency; DEP_INSN writes a register that INSN reads
29842 some cycles later. */
29844 /* Separate a load from a narrower, dependent store. */
29845 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9)
29846 && GET_CODE (PATTERN (insn)) == SET
29847 && GET_CODE (PATTERN (dep_insn)) == SET
29848 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
29849 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
29850 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
29851 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
29852 return cost + 14;
29854 attr_type = get_attr_type (insn);
29856 switch (attr_type)
29858 case TYPE_JMPREG:
29859 /* Tell the first scheduling pass about the latency between
29860 a mtctr and bctr (and mtlr and br/blr). The first
29861 scheduling pass will not know about this latency since
29862 the mtctr instruction, which has the latency associated
29863 to it, will be generated by reload. */
29864 return 4;
29865 case TYPE_BRANCH:
29866 /* Leave some extra cycles between a compare and its
29867 dependent branch, to inhibit expensive mispredicts. */
29868 if ((rs6000_tune == PROCESSOR_PPC603
29869 || rs6000_tune == PROCESSOR_PPC604
29870 || rs6000_tune == PROCESSOR_PPC604e
29871 || rs6000_tune == PROCESSOR_PPC620
29872 || rs6000_tune == PROCESSOR_PPC630
29873 || rs6000_tune == PROCESSOR_PPC750
29874 || rs6000_tune == PROCESSOR_PPC7400
29875 || rs6000_tune == PROCESSOR_PPC7450
29876 || rs6000_tune == PROCESSOR_PPCE5500
29877 || rs6000_tune == PROCESSOR_PPCE6500
29878 || rs6000_tune == PROCESSOR_POWER4
29879 || rs6000_tune == PROCESSOR_POWER5
29880 || rs6000_tune == PROCESSOR_POWER7
29881 || rs6000_tune == PROCESSOR_POWER8
29882 || rs6000_tune == PROCESSOR_POWER9
29883 || rs6000_tune == PROCESSOR_CELL)
29884 && recog_memoized (dep_insn)
29885 && (INSN_CODE (dep_insn) >= 0))
29887 switch (get_attr_type (dep_insn))
29889 case TYPE_CMP:
29890 case TYPE_FPCOMPARE:
29891 case TYPE_CR_LOGICAL:
29892 return cost + 2;
29893 case TYPE_EXTS:
29894 case TYPE_MUL:
29895 if (get_attr_dot (dep_insn) == DOT_YES)
29896 return cost + 2;
29897 else
29898 break;
29899 case TYPE_SHIFT:
29900 if (get_attr_dot (dep_insn) == DOT_YES
29901 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
29902 return cost + 2;
29903 else
29904 break;
29905 default:
29906 break;
29908 break;
29910 case TYPE_STORE:
29911 case TYPE_FPSTORE:
29912 if ((rs6000_tune == PROCESSOR_POWER6)
29913 && recog_memoized (dep_insn)
29914 && (INSN_CODE (dep_insn) >= 0))
29917 if (GET_CODE (PATTERN (insn)) != SET)
29918 /* If this happens, we have to extend this to schedule
29919 optimally. Return default for now. */
29920 return cost;
29922 /* Adjust the cost for the case where the value written
29923 by a fixed point operation is used as the address
29924 gen value on a store. */
29925 switch (get_attr_type (dep_insn))
29927 case TYPE_LOAD:
29928 case TYPE_CNTLZ:
29930 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29931 return get_attr_sign_extend (dep_insn)
29932 == SIGN_EXTEND_YES ? 6 : 4;
29933 break;
29935 case TYPE_SHIFT:
29937 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29938 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29939 6 : 3;
29940 break;
29942 case TYPE_INTEGER:
29943 case TYPE_ADD:
29944 case TYPE_LOGICAL:
29945 case TYPE_EXTS:
29946 case TYPE_INSERT:
29948 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29949 return 3;
29950 break;
29952 case TYPE_STORE:
29953 case TYPE_FPLOAD:
29954 case TYPE_FPSTORE:
29956 if (get_attr_update (dep_insn) == UPDATE_YES
29957 && ! rs6000_store_data_bypass_p (dep_insn, insn))
29958 return 3;
29959 break;
29961 case TYPE_MUL:
29963 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29964 return 17;
29965 break;
29967 case TYPE_DIV:
29969 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29970 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
29971 break;
29973 default:
29974 break;
29977 break;
29979 case TYPE_LOAD:
29980 if ((rs6000_tune == PROCESSOR_POWER6)
29981 && recog_memoized (dep_insn)
29982 && (INSN_CODE (dep_insn) >= 0))
29985 /* Adjust the cost for the case where the value written
29986 by a fixed point instruction is used within the address
29987 gen portion of a subsequent load(u)(x) */
29988 switch (get_attr_type (dep_insn))
29990 case TYPE_LOAD:
29991 case TYPE_CNTLZ:
29993 if (set_to_load_agen (dep_insn, insn))
29994 return get_attr_sign_extend (dep_insn)
29995 == SIGN_EXTEND_YES ? 6 : 4;
29996 break;
29998 case TYPE_SHIFT:
30000 if (set_to_load_agen (dep_insn, insn))
30001 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30002 6 : 3;
30003 break;
30005 case TYPE_INTEGER:
30006 case TYPE_ADD:
30007 case TYPE_LOGICAL:
30008 case TYPE_EXTS:
30009 case TYPE_INSERT:
30011 if (set_to_load_agen (dep_insn, insn))
30012 return 3;
30013 break;
30015 case TYPE_STORE:
30016 case TYPE_FPLOAD:
30017 case TYPE_FPSTORE:
30019 if (get_attr_update (dep_insn) == UPDATE_YES
30020 && set_to_load_agen (dep_insn, insn))
30021 return 3;
30022 break;
30024 case TYPE_MUL:
30026 if (set_to_load_agen (dep_insn, insn))
30027 return 17;
30028 break;
30030 case TYPE_DIV:
30032 if (set_to_load_agen (dep_insn, insn))
30033 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30034 break;
30036 default:
30037 break;
30040 break;
30042 case TYPE_FPLOAD:
30043 if ((rs6000_tune == PROCESSOR_POWER6)
30044 && get_attr_update (insn) == UPDATE_NO
30045 && recog_memoized (dep_insn)
30046 && (INSN_CODE (dep_insn) >= 0)
30047 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30048 return 2;
30050 default:
30051 break;
30054 /* Fall out to return default cost. */
30056 break;
30058 case REG_DEP_OUTPUT:
30059 /* Output dependency; DEP_INSN writes a register that INSN writes some
30060 cycles later. */
30061 if ((rs6000_tune == PROCESSOR_POWER6)
30062 && recog_memoized (dep_insn)
30063 && (INSN_CODE (dep_insn) >= 0))
30065 attr_type = get_attr_type (insn);
30067 switch (attr_type)
30069 case TYPE_FP:
30070 case TYPE_FPSIMPLE:
30071 if (get_attr_type (dep_insn) == TYPE_FP
30072 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30073 return 1;
30074 break;
30075 case TYPE_FPLOAD:
30076 if (get_attr_update (insn) == UPDATE_NO
30077 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30078 return 2;
30079 break;
30080 default:
30081 break;
30084 /* Fall through, no cost for output dependency. */
30085 /* FALLTHRU */
30087 case REG_DEP_ANTI:
30088 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30089 cycles later. */
30090 return 0;
30092 default:
30093 gcc_unreachable ();
30096 return cost;
30099 /* Debug version of rs6000_adjust_cost. */
30101 static int
30102 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30103 int cost, unsigned int dw)
30105 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30107 if (ret != cost)
30109 const char *dep;
30111 switch (dep_type)
30113 default: dep = "unknown depencency"; break;
30114 case REG_DEP_TRUE: dep = "data dependency"; break;
30115 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30116 case REG_DEP_ANTI: dep = "anti depencency"; break;
30119 fprintf (stderr,
30120 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30121 "%s, insn:\n", ret, cost, dep);
30123 debug_rtx (insn);
30126 return ret;
30129 /* The function returns a true if INSN is microcoded.
30130 Return false otherwise. */
30132 static bool
30133 is_microcoded_insn (rtx_insn *insn)
30135 if (!insn || !NONDEBUG_INSN_P (insn)
30136 || GET_CODE (PATTERN (insn)) == USE
30137 || GET_CODE (PATTERN (insn)) == CLOBBER)
30138 return false;
30140 if (rs6000_tune == PROCESSOR_CELL)
30141 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30143 if (rs6000_sched_groups
30144 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30146 enum attr_type type = get_attr_type (insn);
30147 if ((type == TYPE_LOAD
30148 && get_attr_update (insn) == UPDATE_YES
30149 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30150 || ((type == TYPE_LOAD || type == TYPE_STORE)
30151 && get_attr_update (insn) == UPDATE_YES
30152 && get_attr_indexed (insn) == INDEXED_YES)
30153 || type == TYPE_MFCR)
30154 return true;
30157 return false;
30160 /* The function returns true if INSN is cracked into 2 instructions
30161 by the processor (and therefore occupies 2 issue slots). */
30163 static bool
30164 is_cracked_insn (rtx_insn *insn)
30166 if (!insn || !NONDEBUG_INSN_P (insn)
30167 || GET_CODE (PATTERN (insn)) == USE
30168 || GET_CODE (PATTERN (insn)) == CLOBBER)
30169 return false;
30171 if (rs6000_sched_groups
30172 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30174 enum attr_type type = get_attr_type (insn);
30175 if ((type == TYPE_LOAD
30176 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30177 && get_attr_update (insn) == UPDATE_NO)
30178 || (type == TYPE_LOAD
30179 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30180 && get_attr_update (insn) == UPDATE_YES
30181 && get_attr_indexed (insn) == INDEXED_NO)
30182 || (type == TYPE_STORE
30183 && get_attr_update (insn) == UPDATE_YES
30184 && get_attr_indexed (insn) == INDEXED_NO)
30185 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30186 && get_attr_update (insn) == UPDATE_YES)
30187 || (type == TYPE_CR_LOGICAL
30188 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
30189 || (type == TYPE_EXTS
30190 && get_attr_dot (insn) == DOT_YES)
30191 || (type == TYPE_SHIFT
30192 && get_attr_dot (insn) == DOT_YES
30193 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30194 || (type == TYPE_MUL
30195 && get_attr_dot (insn) == DOT_YES)
30196 || type == TYPE_DIV
30197 || (type == TYPE_INSERT
30198 && get_attr_size (insn) == SIZE_32))
30199 return true;
30202 return false;
30205 /* The function returns true if INSN can be issued only from
30206 the branch slot. */
30208 static bool
30209 is_branch_slot_insn (rtx_insn *insn)
30211 if (!insn || !NONDEBUG_INSN_P (insn)
30212 || GET_CODE (PATTERN (insn)) == USE
30213 || GET_CODE (PATTERN (insn)) == CLOBBER)
30214 return false;
30216 if (rs6000_sched_groups)
30218 enum attr_type type = get_attr_type (insn);
30219 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30220 return true;
30221 return false;
30224 return false;
30227 /* The function returns true if out_inst sets a value that is
30228 used in the address generation computation of in_insn */
30229 static bool
30230 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30232 rtx out_set, in_set;
30234 /* For performance reasons, only handle the simple case where
30235 both loads are a single_set. */
30236 out_set = single_set (out_insn);
30237 if (out_set)
30239 in_set = single_set (in_insn);
30240 if (in_set)
30241 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30244 return false;
30247 /* Try to determine base/offset/size parts of the given MEM.
30248 Return true if successful, false if all the values couldn't
30249 be determined.
30251 This function only looks for REG or REG+CONST address forms.
30252 REG+REG address form will return false. */
30254 static bool
30255 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30256 HOST_WIDE_INT *size)
30258 rtx addr_rtx;
30259 if MEM_SIZE_KNOWN_P (mem)
30260 *size = MEM_SIZE (mem);
30261 else
30262 return false;
30264 addr_rtx = (XEXP (mem, 0));
30265 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30266 addr_rtx = XEXP (addr_rtx, 1);
30268 *offset = 0;
30269 while (GET_CODE (addr_rtx) == PLUS
30270 && CONST_INT_P (XEXP (addr_rtx, 1)))
30272 *offset += INTVAL (XEXP (addr_rtx, 1));
30273 addr_rtx = XEXP (addr_rtx, 0);
30275 if (!REG_P (addr_rtx))
30276 return false;
30278 *base = addr_rtx;
30279 return true;
30282 /* The function returns true if the target storage location of
30283 mem1 is adjacent to the target storage location of mem2 */
30284 /* Return 1 if memory locations are adjacent. */
30286 static bool
30287 adjacent_mem_locations (rtx mem1, rtx mem2)
30289 rtx reg1, reg2;
30290 HOST_WIDE_INT off1, size1, off2, size2;
30292 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30293 && get_memref_parts (mem2, &reg2, &off2, &size2))
30294 return ((REGNO (reg1) == REGNO (reg2))
30295 && ((off1 + size1 == off2)
30296 || (off2 + size2 == off1)));
30298 return false;
30301 /* This function returns true if it can be determined that the two MEM
30302 locations overlap by at least 1 byte based on base reg/offset/size. */
30304 static bool
30305 mem_locations_overlap (rtx mem1, rtx mem2)
30307 rtx reg1, reg2;
30308 HOST_WIDE_INT off1, size1, off2, size2;
30310 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30311 && get_memref_parts (mem2, &reg2, &off2, &size2))
30312 return ((REGNO (reg1) == REGNO (reg2))
30313 && (((off1 <= off2) && (off1 + size1 > off2))
30314 || ((off2 <= off1) && (off2 + size2 > off1))));
30316 return false;
30319 /* A C statement (sans semicolon) to update the integer scheduling
30320 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30321 INSN earlier, reduce the priority to execute INSN later. Do not
30322 define this macro if you do not need to adjust the scheduling
30323 priorities of insns. */
30325 static int
30326 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30328 rtx load_mem, str_mem;
30329 /* On machines (like the 750) which have asymmetric integer units,
30330 where one integer unit can do multiply and divides and the other
30331 can't, reduce the priority of multiply/divide so it is scheduled
30332 before other integer operations. */
30334 #if 0
30335 if (! INSN_P (insn))
30336 return priority;
30338 if (GET_CODE (PATTERN (insn)) == USE)
30339 return priority;
30341 switch (rs6000_tune) {
30342 case PROCESSOR_PPC750:
30343 switch (get_attr_type (insn))
30345 default:
30346 break;
30348 case TYPE_MUL:
30349 case TYPE_DIV:
30350 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30351 priority, priority);
30352 if (priority >= 0 && priority < 0x01000000)
30353 priority >>= 3;
30354 break;
30357 #endif
30359 if (insn_must_be_first_in_group (insn)
30360 && reload_completed
30361 && current_sched_info->sched_max_insns_priority
30362 && rs6000_sched_restricted_insns_priority)
30365 /* Prioritize insns that can be dispatched only in the first
30366 dispatch slot. */
30367 if (rs6000_sched_restricted_insns_priority == 1)
30368 /* Attach highest priority to insn. This means that in
30369 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30370 precede 'priority' (critical path) considerations. */
30371 return current_sched_info->sched_max_insns_priority;
30372 else if (rs6000_sched_restricted_insns_priority == 2)
30373 /* Increase priority of insn by a minimal amount. This means that in
30374 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30375 considerations precede dispatch-slot restriction considerations. */
30376 return (priority + 1);
30379 if (rs6000_tune == PROCESSOR_POWER6
30380 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30381 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30382 /* Attach highest priority to insn if the scheduler has just issued two
30383 stores and this instruction is a load, or two loads and this instruction
30384 is a store. Power6 wants loads and stores scheduled alternately
30385 when possible */
30386 return current_sched_info->sched_max_insns_priority;
30388 return priority;
30391 /* Return true if the instruction is nonpipelined on the Cell. */
30392 static bool
30393 is_nonpipeline_insn (rtx_insn *insn)
30395 enum attr_type type;
30396 if (!insn || !NONDEBUG_INSN_P (insn)
30397 || GET_CODE (PATTERN (insn)) == USE
30398 || GET_CODE (PATTERN (insn)) == CLOBBER)
30399 return false;
30401 type = get_attr_type (insn);
30402 if (type == TYPE_MUL
30403 || type == TYPE_DIV
30404 || type == TYPE_SDIV
30405 || type == TYPE_DDIV
30406 || type == TYPE_SSQRT
30407 || type == TYPE_DSQRT
30408 || type == TYPE_MFCR
30409 || type == TYPE_MFCRF
30410 || type == TYPE_MFJMPR)
30412 return true;
30414 return false;
30418 /* Return how many instructions the machine can issue per cycle. */
30420 static int
30421 rs6000_issue_rate (void)
30423 /* Unless scheduling for register pressure, use issue rate of 1 for
30424 first scheduling pass to decrease degradation. */
30425 if (!reload_completed && !flag_sched_pressure)
30426 return 1;
30428 switch (rs6000_tune) {
30429 case PROCESSOR_RS64A:
30430 case PROCESSOR_PPC601: /* ? */
30431 case PROCESSOR_PPC7450:
30432 return 3;
30433 case PROCESSOR_PPC440:
30434 case PROCESSOR_PPC603:
30435 case PROCESSOR_PPC750:
30436 case PROCESSOR_PPC7400:
30437 case PROCESSOR_PPC8540:
30438 case PROCESSOR_PPC8548:
30439 case PROCESSOR_CELL:
30440 case PROCESSOR_PPCE300C2:
30441 case PROCESSOR_PPCE300C3:
30442 case PROCESSOR_PPCE500MC:
30443 case PROCESSOR_PPCE500MC64:
30444 case PROCESSOR_PPCE5500:
30445 case PROCESSOR_PPCE6500:
30446 case PROCESSOR_TITAN:
30447 return 2;
30448 case PROCESSOR_PPC476:
30449 case PROCESSOR_PPC604:
30450 case PROCESSOR_PPC604e:
30451 case PROCESSOR_PPC620:
30452 case PROCESSOR_PPC630:
30453 return 4;
30454 case PROCESSOR_POWER4:
30455 case PROCESSOR_POWER5:
30456 case PROCESSOR_POWER6:
30457 case PROCESSOR_POWER7:
30458 return 5;
30459 case PROCESSOR_POWER8:
30460 return 7;
30461 case PROCESSOR_POWER9:
30462 return 6;
30463 default:
30464 return 1;
30468 /* Return how many instructions to look ahead for better insn
30469 scheduling. */
30471 static int
30472 rs6000_use_sched_lookahead (void)
30474 switch (rs6000_tune)
30476 case PROCESSOR_PPC8540:
30477 case PROCESSOR_PPC8548:
30478 return 4;
30480 case PROCESSOR_CELL:
30481 return (reload_completed ? 8 : 0);
30483 default:
30484 return 0;
30488 /* We are choosing insn from the ready queue. Return zero if INSN can be
30489 chosen. */
30490 static int
30491 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
30493 if (ready_index == 0)
30494 return 0;
30496 if (rs6000_tune != PROCESSOR_CELL)
30497 return 0;
30499 gcc_assert (insn != NULL_RTX && INSN_P (insn));
30501 if (!reload_completed
30502 || is_nonpipeline_insn (insn)
30503 || is_microcoded_insn (insn))
30504 return 1;
30506 return 0;
30509 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
30510 and return true. */
30512 static bool
30513 find_mem_ref (rtx pat, rtx *mem_ref)
30515 const char * fmt;
30516 int i, j;
30518 /* stack_tie does not produce any real memory traffic. */
30519 if (tie_operand (pat, VOIDmode))
30520 return false;
30522 if (GET_CODE (pat) == MEM)
30524 *mem_ref = pat;
30525 return true;
30528 /* Recursively process the pattern. */
30529 fmt = GET_RTX_FORMAT (GET_CODE (pat));
30531 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
30533 if (fmt[i] == 'e')
30535 if (find_mem_ref (XEXP (pat, i), mem_ref))
30536 return true;
30538 else if (fmt[i] == 'E')
30539 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
30541 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
30542 return true;
30546 return false;
30549 /* Determine if PAT is a PATTERN of a load insn. */
30551 static bool
30552 is_load_insn1 (rtx pat, rtx *load_mem)
30554 if (!pat || pat == NULL_RTX)
30555 return false;
30557 if (GET_CODE (pat) == SET)
30558 return find_mem_ref (SET_SRC (pat), load_mem);
30560 if (GET_CODE (pat) == PARALLEL)
30562 int i;
30564 for (i = 0; i < XVECLEN (pat, 0); i++)
30565 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
30566 return true;
30569 return false;
30572 /* Determine if INSN loads from memory. */
30574 static bool
30575 is_load_insn (rtx insn, rtx *load_mem)
30577 if (!insn || !INSN_P (insn))
30578 return false;
30580 if (CALL_P (insn))
30581 return false;
30583 return is_load_insn1 (PATTERN (insn), load_mem);
30586 /* Determine if PAT is a PATTERN of a store insn. */
30588 static bool
30589 is_store_insn1 (rtx pat, rtx *str_mem)
30591 if (!pat || pat == NULL_RTX)
30592 return false;
30594 if (GET_CODE (pat) == SET)
30595 return find_mem_ref (SET_DEST (pat), str_mem);
30597 if (GET_CODE (pat) == PARALLEL)
30599 int i;
30601 for (i = 0; i < XVECLEN (pat, 0); i++)
30602 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
30603 return true;
30606 return false;
30609 /* Determine if INSN stores to memory. */
30611 static bool
30612 is_store_insn (rtx insn, rtx *str_mem)
30614 if (!insn || !INSN_P (insn))
30615 return false;
30617 return is_store_insn1 (PATTERN (insn), str_mem);
30620 /* Return whether TYPE is a Power9 pairable vector instruction type. */
30622 static bool
30623 is_power9_pairable_vec_type (enum attr_type type)
30625 switch (type)
30627 case TYPE_VECSIMPLE:
30628 case TYPE_VECCOMPLEX:
30629 case TYPE_VECDIV:
30630 case TYPE_VECCMP:
30631 case TYPE_VECPERM:
30632 case TYPE_VECFLOAT:
30633 case TYPE_VECFDIV:
30634 case TYPE_VECDOUBLE:
30635 return true;
30636 default:
30637 break;
30639 return false;
30642 /* Returns whether the dependence between INSN and NEXT is considered
30643 costly by the given target. */
30645 static bool
30646 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
30648 rtx insn;
30649 rtx next;
30650 rtx load_mem, str_mem;
30652 /* If the flag is not enabled - no dependence is considered costly;
30653 allow all dependent insns in the same group.
30654 This is the most aggressive option. */
30655 if (rs6000_sched_costly_dep == no_dep_costly)
30656 return false;
30658 /* If the flag is set to 1 - a dependence is always considered costly;
30659 do not allow dependent instructions in the same group.
30660 This is the most conservative option. */
30661 if (rs6000_sched_costly_dep == all_deps_costly)
30662 return true;
30664 insn = DEP_PRO (dep);
30665 next = DEP_CON (dep);
30667 if (rs6000_sched_costly_dep == store_to_load_dep_costly
30668 && is_load_insn (next, &load_mem)
30669 && is_store_insn (insn, &str_mem))
30670 /* Prevent load after store in the same group. */
30671 return true;
30673 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
30674 && is_load_insn (next, &load_mem)
30675 && is_store_insn (insn, &str_mem)
30676 && DEP_TYPE (dep) == REG_DEP_TRUE
30677 && mem_locations_overlap(str_mem, load_mem))
30678 /* Prevent load after store in the same group if it is a true
30679 dependence. */
30680 return true;
30682 /* The flag is set to X; dependences with latency >= X are considered costly,
30683 and will not be scheduled in the same group. */
30684 if (rs6000_sched_costly_dep <= max_dep_latency
30685 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
30686 return true;
30688 return false;
30691 /* Return the next insn after INSN that is found before TAIL is reached,
30692 skipping any "non-active" insns - insns that will not actually occupy
30693 an issue slot. Return NULL_RTX if such an insn is not found. */
30695 static rtx_insn *
30696 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
30698 if (insn == NULL_RTX || insn == tail)
30699 return NULL;
30701 while (1)
30703 insn = NEXT_INSN (insn);
30704 if (insn == NULL_RTX || insn == tail)
30705 return NULL;
30707 if (CALL_P (insn)
30708 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
30709 || (NONJUMP_INSN_P (insn)
30710 && GET_CODE (PATTERN (insn)) != USE
30711 && GET_CODE (PATTERN (insn)) != CLOBBER
30712 && INSN_CODE (insn) != CODE_FOR_stack_tie))
30713 break;
30715 return insn;
30718 /* Do Power9 specific sched_reorder2 reordering of ready list. */
30720 static int
30721 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
30723 int pos;
30724 int i;
30725 rtx_insn *tmp;
30726 enum attr_type type, type2;
30728 type = get_attr_type (last_scheduled_insn);
30730 /* Try to issue fixed point divides back-to-back in pairs so they will be
30731 routed to separate execution units and execute in parallel. */
30732 if (type == TYPE_DIV && divide_cnt == 0)
30734 /* First divide has been scheduled. */
30735 divide_cnt = 1;
30737 /* Scan the ready list looking for another divide, if found move it
30738 to the end of the list so it is chosen next. */
30739 pos = lastpos;
30740 while (pos >= 0)
30742 if (recog_memoized (ready[pos]) >= 0
30743 && get_attr_type (ready[pos]) == TYPE_DIV)
30745 tmp = ready[pos];
30746 for (i = pos; i < lastpos; i++)
30747 ready[i] = ready[i + 1];
30748 ready[lastpos] = tmp;
30749 break;
30751 pos--;
30754 else
30756 /* Last insn was the 2nd divide or not a divide, reset the counter. */
30757 divide_cnt = 0;
30759 /* The best dispatch throughput for vector and vector load insns can be
30760 achieved by interleaving a vector and vector load such that they'll
30761 dispatch to the same superslice. If this pairing cannot be achieved
30762 then it is best to pair vector insns together and vector load insns
30763 together.
30765 To aid in this pairing, vec_pairing maintains the current state with
30766 the following values:
30768 0 : Initial state, no vecload/vector pairing has been started.
30770 1 : A vecload or vector insn has been issued and a candidate for
30771 pairing has been found and moved to the end of the ready
30772 list. */
30773 if (type == TYPE_VECLOAD)
30775 /* Issued a vecload. */
30776 if (vec_pairing == 0)
30778 int vecload_pos = -1;
30779 /* We issued a single vecload, look for a vector insn to pair it
30780 with. If one isn't found, try to pair another vecload. */
30781 pos = lastpos;
30782 while (pos >= 0)
30784 if (recog_memoized (ready[pos]) >= 0)
30786 type2 = get_attr_type (ready[pos]);
30787 if (is_power9_pairable_vec_type (type2))
30789 /* Found a vector insn to pair with, move it to the
30790 end of the ready list so it is scheduled next. */
30791 tmp = ready[pos];
30792 for (i = pos; i < lastpos; i++)
30793 ready[i] = ready[i + 1];
30794 ready[lastpos] = tmp;
30795 vec_pairing = 1;
30796 return cached_can_issue_more;
30798 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
30799 /* Remember position of first vecload seen. */
30800 vecload_pos = pos;
30802 pos--;
30804 if (vecload_pos >= 0)
30806 /* Didn't find a vector to pair with but did find a vecload,
30807 move it to the end of the ready list. */
30808 tmp = ready[vecload_pos];
30809 for (i = vecload_pos; i < lastpos; i++)
30810 ready[i] = ready[i + 1];
30811 ready[lastpos] = tmp;
30812 vec_pairing = 1;
30813 return cached_can_issue_more;
30817 else if (is_power9_pairable_vec_type (type))
30819 /* Issued a vector operation. */
30820 if (vec_pairing == 0)
30822 int vec_pos = -1;
30823 /* We issued a single vector insn, look for a vecload to pair it
30824 with. If one isn't found, try to pair another vector. */
30825 pos = lastpos;
30826 while (pos >= 0)
30828 if (recog_memoized (ready[pos]) >= 0)
30830 type2 = get_attr_type (ready[pos]);
30831 if (type2 == TYPE_VECLOAD)
30833 /* Found a vecload insn to pair with, move it to the
30834 end of the ready list so it is scheduled next. */
30835 tmp = ready[pos];
30836 for (i = pos; i < lastpos; i++)
30837 ready[i] = ready[i + 1];
30838 ready[lastpos] = tmp;
30839 vec_pairing = 1;
30840 return cached_can_issue_more;
30842 else if (is_power9_pairable_vec_type (type2)
30843 && vec_pos == -1)
30844 /* Remember position of first vector insn seen. */
30845 vec_pos = pos;
30847 pos--;
30849 if (vec_pos >= 0)
30851 /* Didn't find a vecload to pair with but did find a vector
30852 insn, move it to the end of the ready list. */
30853 tmp = ready[vec_pos];
30854 for (i = vec_pos; i < lastpos; i++)
30855 ready[i] = ready[i + 1];
30856 ready[lastpos] = tmp;
30857 vec_pairing = 1;
30858 return cached_can_issue_more;
30863 /* We've either finished a vec/vecload pair, couldn't find an insn to
30864 continue the current pair, or the last insn had nothing to do with
30865 with pairing. In any case, reset the state. */
30866 vec_pairing = 0;
30869 return cached_can_issue_more;
30872 /* We are about to begin issuing insns for this clock cycle. */
30874 static int
30875 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
30876 rtx_insn **ready ATTRIBUTE_UNUSED,
30877 int *pn_ready ATTRIBUTE_UNUSED,
30878 int clock_var ATTRIBUTE_UNUSED)
30880 int n_ready = *pn_ready;
30882 if (sched_verbose)
30883 fprintf (dump, "// rs6000_sched_reorder :\n");
30885 /* Reorder the ready list, if the second to last ready insn
30886 is a nonepipeline insn. */
30887 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
30889 if (is_nonpipeline_insn (ready[n_ready - 1])
30890 && (recog_memoized (ready[n_ready - 2]) > 0))
30891 /* Simply swap first two insns. */
30892 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
30895 if (rs6000_tune == PROCESSOR_POWER6)
30896 load_store_pendulum = 0;
30898 return rs6000_issue_rate ();
30901 /* Like rs6000_sched_reorder, but called after issuing each insn. */
30903 static int
30904 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
30905 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
30907 if (sched_verbose)
30908 fprintf (dump, "// rs6000_sched_reorder2 :\n");
30910 /* For Power6, we need to handle some special cases to try and keep the
30911 store queue from overflowing and triggering expensive flushes.
30913 This code monitors how load and store instructions are being issued
30914 and skews the ready list one way or the other to increase the likelihood
30915 that a desired instruction is issued at the proper time.
30917 A couple of things are done. First, we maintain a "load_store_pendulum"
30918 to track the current state of load/store issue.
30920 - If the pendulum is at zero, then no loads or stores have been
30921 issued in the current cycle so we do nothing.
30923 - If the pendulum is 1, then a single load has been issued in this
30924 cycle and we attempt to locate another load in the ready list to
30925 issue with it.
30927 - If the pendulum is -2, then two stores have already been
30928 issued in this cycle, so we increase the priority of the first load
30929 in the ready list to increase it's likelihood of being chosen first
30930 in the next cycle.
30932 - If the pendulum is -1, then a single store has been issued in this
30933 cycle and we attempt to locate another store in the ready list to
30934 issue with it, preferring a store to an adjacent memory location to
30935 facilitate store pairing in the store queue.
30937 - If the pendulum is 2, then two loads have already been
30938 issued in this cycle, so we increase the priority of the first store
30939 in the ready list to increase it's likelihood of being chosen first
30940 in the next cycle.
30942 - If the pendulum < -2 or > 2, then do nothing.
30944 Note: This code covers the most common scenarios. There exist non
30945 load/store instructions which make use of the LSU and which
30946 would need to be accounted for to strictly model the behavior
30947 of the machine. Those instructions are currently unaccounted
30948 for to help minimize compile time overhead of this code.
30950 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
30952 int pos;
30953 int i;
30954 rtx_insn *tmp;
30955 rtx load_mem, str_mem;
30957 if (is_store_insn (last_scheduled_insn, &str_mem))
30958 /* Issuing a store, swing the load_store_pendulum to the left */
30959 load_store_pendulum--;
30960 else if (is_load_insn (last_scheduled_insn, &load_mem))
30961 /* Issuing a load, swing the load_store_pendulum to the right */
30962 load_store_pendulum++;
30963 else
30964 return cached_can_issue_more;
30966 /* If the pendulum is balanced, or there is only one instruction on
30967 the ready list, then all is well, so return. */
30968 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
30969 return cached_can_issue_more;
30971 if (load_store_pendulum == 1)
30973 /* A load has been issued in this cycle. Scan the ready list
30974 for another load to issue with it */
30975 pos = *pn_ready-1;
30977 while (pos >= 0)
30979 if (is_load_insn (ready[pos], &load_mem))
30981 /* Found a load. Move it to the head of the ready list,
30982 and adjust it's priority so that it is more likely to
30983 stay there */
30984 tmp = ready[pos];
30985 for (i=pos; i<*pn_ready-1; i++)
30986 ready[i] = ready[i + 1];
30987 ready[*pn_ready-1] = tmp;
30989 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30990 INSN_PRIORITY (tmp)++;
30991 break;
30993 pos--;
30996 else if (load_store_pendulum == -2)
30998 /* Two stores have been issued in this cycle. Increase the
30999 priority of the first load in the ready list to favor it for
31000 issuing in the next cycle. */
31001 pos = *pn_ready-1;
31003 while (pos >= 0)
31005 if (is_load_insn (ready[pos], &load_mem)
31006 && !sel_sched_p ()
31007 && INSN_PRIORITY_KNOWN (ready[pos]))
31009 INSN_PRIORITY (ready[pos])++;
31011 /* Adjust the pendulum to account for the fact that a load
31012 was found and increased in priority. This is to prevent
31013 increasing the priority of multiple loads */
31014 load_store_pendulum--;
31016 break;
31018 pos--;
31021 else if (load_store_pendulum == -1)
31023 /* A store has been issued in this cycle. Scan the ready list for
31024 another store to issue with it, preferring a store to an adjacent
31025 memory location */
31026 int first_store_pos = -1;
31028 pos = *pn_ready-1;
31030 while (pos >= 0)
31032 if (is_store_insn (ready[pos], &str_mem))
31034 rtx str_mem2;
31035 /* Maintain the index of the first store found on the
31036 list */
31037 if (first_store_pos == -1)
31038 first_store_pos = pos;
31040 if (is_store_insn (last_scheduled_insn, &str_mem2)
31041 && adjacent_mem_locations (str_mem, str_mem2))
31043 /* Found an adjacent store. Move it to the head of the
31044 ready list, and adjust it's priority so that it is
31045 more likely to stay there */
31046 tmp = ready[pos];
31047 for (i=pos; i<*pn_ready-1; i++)
31048 ready[i] = ready[i + 1];
31049 ready[*pn_ready-1] = tmp;
31051 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31052 INSN_PRIORITY (tmp)++;
31054 first_store_pos = -1;
31056 break;
31059 pos--;
31062 if (first_store_pos >= 0)
31064 /* An adjacent store wasn't found, but a non-adjacent store was,
31065 so move the non-adjacent store to the front of the ready
31066 list, and adjust its priority so that it is more likely to
31067 stay there. */
31068 tmp = ready[first_store_pos];
31069 for (i=first_store_pos; i<*pn_ready-1; i++)
31070 ready[i] = ready[i + 1];
31071 ready[*pn_ready-1] = tmp;
31072 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31073 INSN_PRIORITY (tmp)++;
31076 else if (load_store_pendulum == 2)
31078 /* Two loads have been issued in this cycle. Increase the priority
31079 of the first store in the ready list to favor it for issuing in
31080 the next cycle. */
31081 pos = *pn_ready-1;
31083 while (pos >= 0)
31085 if (is_store_insn (ready[pos], &str_mem)
31086 && !sel_sched_p ()
31087 && INSN_PRIORITY_KNOWN (ready[pos]))
31089 INSN_PRIORITY (ready[pos])++;
31091 /* Adjust the pendulum to account for the fact that a store
31092 was found and increased in priority. This is to prevent
31093 increasing the priority of multiple stores */
31094 load_store_pendulum++;
31096 break;
31098 pos--;
31103 /* Do Power9 dependent reordering if necessary. */
31104 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
31105 && recog_memoized (last_scheduled_insn) >= 0)
31106 return power9_sched_reorder2 (ready, *pn_ready - 1);
31108 return cached_can_issue_more;
31111 /* Return whether the presence of INSN causes a dispatch group termination
31112 of group WHICH_GROUP.
31114 If WHICH_GROUP == current_group, this function will return true if INSN
31115 causes the termination of the current group (i.e, the dispatch group to
31116 which INSN belongs). This means that INSN will be the last insn in the
31117 group it belongs to.
31119 If WHICH_GROUP == previous_group, this function will return true if INSN
31120 causes the termination of the previous group (i.e, the dispatch group that
31121 precedes the group to which INSN belongs). This means that INSN will be
31122 the first insn in the group it belongs to). */
31124 static bool
31125 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31127 bool first, last;
31129 if (! insn)
31130 return false;
31132 first = insn_must_be_first_in_group (insn);
31133 last = insn_must_be_last_in_group (insn);
31135 if (first && last)
31136 return true;
31138 if (which_group == current_group)
31139 return last;
31140 else if (which_group == previous_group)
31141 return first;
31143 return false;
31147 static bool
31148 insn_must_be_first_in_group (rtx_insn *insn)
31150 enum attr_type type;
31152 if (!insn
31153 || NOTE_P (insn)
31154 || DEBUG_INSN_P (insn)
31155 || GET_CODE (PATTERN (insn)) == USE
31156 || GET_CODE (PATTERN (insn)) == CLOBBER)
31157 return false;
31159 switch (rs6000_tune)
31161 case PROCESSOR_POWER5:
31162 if (is_cracked_insn (insn))
31163 return true;
31164 /* FALLTHRU */
31165 case PROCESSOR_POWER4:
31166 if (is_microcoded_insn (insn))
31167 return true;
31169 if (!rs6000_sched_groups)
31170 return false;
31172 type = get_attr_type (insn);
31174 switch (type)
31176 case TYPE_MFCR:
31177 case TYPE_MFCRF:
31178 case TYPE_MTCR:
31179 case TYPE_CR_LOGICAL:
31180 case TYPE_MTJMPR:
31181 case TYPE_MFJMPR:
31182 case TYPE_DIV:
31183 case TYPE_LOAD_L:
31184 case TYPE_STORE_C:
31185 case TYPE_ISYNC:
31186 case TYPE_SYNC:
31187 return true;
31188 default:
31189 break;
31191 break;
31192 case PROCESSOR_POWER6:
31193 type = get_attr_type (insn);
31195 switch (type)
31197 case TYPE_EXTS:
31198 case TYPE_CNTLZ:
31199 case TYPE_TRAP:
31200 case TYPE_MUL:
31201 case TYPE_INSERT:
31202 case TYPE_FPCOMPARE:
31203 case TYPE_MFCR:
31204 case TYPE_MTCR:
31205 case TYPE_MFJMPR:
31206 case TYPE_MTJMPR:
31207 case TYPE_ISYNC:
31208 case TYPE_SYNC:
31209 case TYPE_LOAD_L:
31210 case TYPE_STORE_C:
31211 return true;
31212 case TYPE_SHIFT:
31213 if (get_attr_dot (insn) == DOT_NO
31214 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31215 return true;
31216 else
31217 break;
31218 case TYPE_DIV:
31219 if (get_attr_size (insn) == SIZE_32)
31220 return true;
31221 else
31222 break;
31223 case TYPE_LOAD:
31224 case TYPE_STORE:
31225 case TYPE_FPLOAD:
31226 case TYPE_FPSTORE:
31227 if (get_attr_update (insn) == UPDATE_YES)
31228 return true;
31229 else
31230 break;
31231 default:
31232 break;
31234 break;
31235 case PROCESSOR_POWER7:
31236 type = get_attr_type (insn);
31238 switch (type)
31240 case TYPE_CR_LOGICAL:
31241 case TYPE_MFCR:
31242 case TYPE_MFCRF:
31243 case TYPE_MTCR:
31244 case TYPE_DIV:
31245 case TYPE_ISYNC:
31246 case TYPE_LOAD_L:
31247 case TYPE_STORE_C:
31248 case TYPE_MFJMPR:
31249 case TYPE_MTJMPR:
31250 return true;
31251 case TYPE_MUL:
31252 case TYPE_SHIFT:
31253 case TYPE_EXTS:
31254 if (get_attr_dot (insn) == DOT_YES)
31255 return true;
31256 else
31257 break;
31258 case TYPE_LOAD:
31259 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31260 || get_attr_update (insn) == UPDATE_YES)
31261 return true;
31262 else
31263 break;
31264 case TYPE_STORE:
31265 case TYPE_FPLOAD:
31266 case TYPE_FPSTORE:
31267 if (get_attr_update (insn) == UPDATE_YES)
31268 return true;
31269 else
31270 break;
31271 default:
31272 break;
31274 break;
31275 case PROCESSOR_POWER8:
31276 type = get_attr_type (insn);
31278 switch (type)
31280 case TYPE_CR_LOGICAL:
31281 case TYPE_MFCR:
31282 case TYPE_MFCRF:
31283 case TYPE_MTCR:
31284 case TYPE_SYNC:
31285 case TYPE_ISYNC:
31286 case TYPE_LOAD_L:
31287 case TYPE_STORE_C:
31288 case TYPE_VECSTORE:
31289 case TYPE_MFJMPR:
31290 case TYPE_MTJMPR:
31291 return true;
31292 case TYPE_SHIFT:
31293 case TYPE_EXTS:
31294 case TYPE_MUL:
31295 if (get_attr_dot (insn) == DOT_YES)
31296 return true;
31297 else
31298 break;
31299 case TYPE_LOAD:
31300 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31301 || get_attr_update (insn) == UPDATE_YES)
31302 return true;
31303 else
31304 break;
31305 case TYPE_STORE:
31306 if (get_attr_update (insn) == UPDATE_YES
31307 && get_attr_indexed (insn) == INDEXED_YES)
31308 return true;
31309 else
31310 break;
31311 default:
31312 break;
31314 break;
31315 default:
31316 break;
31319 return false;
31322 static bool
31323 insn_must_be_last_in_group (rtx_insn *insn)
31325 enum attr_type type;
31327 if (!insn
31328 || NOTE_P (insn)
31329 || DEBUG_INSN_P (insn)
31330 || GET_CODE (PATTERN (insn)) == USE
31331 || GET_CODE (PATTERN (insn)) == CLOBBER)
31332 return false;
31334 switch (rs6000_tune) {
31335 case PROCESSOR_POWER4:
31336 case PROCESSOR_POWER5:
31337 if (is_microcoded_insn (insn))
31338 return true;
31340 if (is_branch_slot_insn (insn))
31341 return true;
31343 break;
31344 case PROCESSOR_POWER6:
31345 type = get_attr_type (insn);
31347 switch (type)
31349 case TYPE_EXTS:
31350 case TYPE_CNTLZ:
31351 case TYPE_TRAP:
31352 case TYPE_MUL:
31353 case TYPE_FPCOMPARE:
31354 case TYPE_MFCR:
31355 case TYPE_MTCR:
31356 case TYPE_MFJMPR:
31357 case TYPE_MTJMPR:
31358 case TYPE_ISYNC:
31359 case TYPE_SYNC:
31360 case TYPE_LOAD_L:
31361 case TYPE_STORE_C:
31362 return true;
31363 case TYPE_SHIFT:
31364 if (get_attr_dot (insn) == DOT_NO
31365 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31366 return true;
31367 else
31368 break;
31369 case TYPE_DIV:
31370 if (get_attr_size (insn) == SIZE_32)
31371 return true;
31372 else
31373 break;
31374 default:
31375 break;
31377 break;
31378 case PROCESSOR_POWER7:
31379 type = get_attr_type (insn);
31381 switch (type)
31383 case TYPE_ISYNC:
31384 case TYPE_SYNC:
31385 case TYPE_LOAD_L:
31386 case TYPE_STORE_C:
31387 return true;
31388 case TYPE_LOAD:
31389 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31390 && get_attr_update (insn) == UPDATE_YES)
31391 return true;
31392 else
31393 break;
31394 case TYPE_STORE:
31395 if (get_attr_update (insn) == UPDATE_YES
31396 && get_attr_indexed (insn) == INDEXED_YES)
31397 return true;
31398 else
31399 break;
31400 default:
31401 break;
31403 break;
31404 case PROCESSOR_POWER8:
31405 type = get_attr_type (insn);
31407 switch (type)
31409 case TYPE_MFCR:
31410 case TYPE_MTCR:
31411 case TYPE_ISYNC:
31412 case TYPE_SYNC:
31413 case TYPE_LOAD_L:
31414 case TYPE_STORE_C:
31415 return true;
31416 case TYPE_LOAD:
31417 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31418 && get_attr_update (insn) == UPDATE_YES)
31419 return true;
31420 else
31421 break;
31422 case TYPE_STORE:
31423 if (get_attr_update (insn) == UPDATE_YES
31424 && get_attr_indexed (insn) == INDEXED_YES)
31425 return true;
31426 else
31427 break;
31428 default:
31429 break;
31431 break;
31432 default:
31433 break;
31436 return false;
31439 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
31440 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
31442 static bool
31443 is_costly_group (rtx *group_insns, rtx next_insn)
31445 int i;
31446 int issue_rate = rs6000_issue_rate ();
31448 for (i = 0; i < issue_rate; i++)
31450 sd_iterator_def sd_it;
31451 dep_t dep;
31452 rtx insn = group_insns[i];
31454 if (!insn)
31455 continue;
31457 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
31459 rtx next = DEP_CON (dep);
31461 if (next == next_insn
31462 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
31463 return true;
31467 return false;
31470 /* Utility of the function redefine_groups.
31471 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
31472 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
31473 to keep it "far" (in a separate group) from GROUP_INSNS, following
31474 one of the following schemes, depending on the value of the flag
31475 -minsert_sched_nops = X:
31476 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
31477 in order to force NEXT_INSN into a separate group.
31478 (2) X < sched_finish_regroup_exact: insert exactly X nops.
31479 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
31480 insertion (has a group just ended, how many vacant issue slots remain in the
31481 last group, and how many dispatch groups were encountered so far). */
31483 static int
31484 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
31485 rtx_insn *next_insn, bool *group_end, int can_issue_more,
31486 int *group_count)
31488 rtx nop;
31489 bool force;
31490 int issue_rate = rs6000_issue_rate ();
31491 bool end = *group_end;
31492 int i;
31494 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
31495 return can_issue_more;
31497 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
31498 return can_issue_more;
31500 force = is_costly_group (group_insns, next_insn);
31501 if (!force)
31502 return can_issue_more;
31504 if (sched_verbose > 6)
31505 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
31506 *group_count ,can_issue_more);
31508 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
31510 if (*group_end)
31511 can_issue_more = 0;
31513 /* Since only a branch can be issued in the last issue_slot, it is
31514 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
31515 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
31516 in this case the last nop will start a new group and the branch
31517 will be forced to the new group. */
31518 if (can_issue_more && !is_branch_slot_insn (next_insn))
31519 can_issue_more--;
31521 /* Do we have a special group ending nop? */
31522 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
31523 || rs6000_tune == PROCESSOR_POWER8)
31525 nop = gen_group_ending_nop ();
31526 emit_insn_before (nop, next_insn);
31527 can_issue_more = 0;
31529 else
31530 while (can_issue_more > 0)
31532 nop = gen_nop ();
31533 emit_insn_before (nop, next_insn);
31534 can_issue_more--;
31537 *group_end = true;
31538 return 0;
31541 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
31543 int n_nops = rs6000_sched_insert_nops;
31545 /* Nops can't be issued from the branch slot, so the effective
31546 issue_rate for nops is 'issue_rate - 1'. */
31547 if (can_issue_more == 0)
31548 can_issue_more = issue_rate;
31549 can_issue_more--;
31550 if (can_issue_more == 0)
31552 can_issue_more = issue_rate - 1;
31553 (*group_count)++;
31554 end = true;
31555 for (i = 0; i < issue_rate; i++)
31557 group_insns[i] = 0;
31561 while (n_nops > 0)
31563 nop = gen_nop ();
31564 emit_insn_before (nop, next_insn);
31565 if (can_issue_more == issue_rate - 1) /* new group begins */
31566 end = false;
31567 can_issue_more--;
31568 if (can_issue_more == 0)
31570 can_issue_more = issue_rate - 1;
31571 (*group_count)++;
31572 end = true;
31573 for (i = 0; i < issue_rate; i++)
31575 group_insns[i] = 0;
31578 n_nops--;
31581 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
31582 can_issue_more++;
31584 /* Is next_insn going to start a new group? */
31585 *group_end
31586 = (end
31587 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31588 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31589 || (can_issue_more < issue_rate &&
31590 insn_terminates_group_p (next_insn, previous_group)));
31591 if (*group_end && end)
31592 (*group_count)--;
31594 if (sched_verbose > 6)
31595 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
31596 *group_count, can_issue_more);
31597 return can_issue_more;
31600 return can_issue_more;
31603 /* This function tries to synch the dispatch groups that the compiler "sees"
31604 with the dispatch groups that the processor dispatcher is expected to
31605 form in practice. It tries to achieve this synchronization by forcing the
31606 estimated processor grouping on the compiler (as opposed to the function
31607 'pad_goups' which tries to force the scheduler's grouping on the processor).
31609 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
31610 examines the (estimated) dispatch groups that will be formed by the processor
31611 dispatcher. It marks these group boundaries to reflect the estimated
31612 processor grouping, overriding the grouping that the scheduler had marked.
31613 Depending on the value of the flag '-minsert-sched-nops' this function can
31614 force certain insns into separate groups or force a certain distance between
31615 them by inserting nops, for example, if there exists a "costly dependence"
31616 between the insns.
31618 The function estimates the group boundaries that the processor will form as
31619 follows: It keeps track of how many vacant issue slots are available after
31620 each insn. A subsequent insn will start a new group if one of the following
31621 4 cases applies:
31622 - no more vacant issue slots remain in the current dispatch group.
31623 - only the last issue slot, which is the branch slot, is vacant, but the next
31624 insn is not a branch.
31625 - only the last 2 or less issue slots, including the branch slot, are vacant,
31626 which means that a cracked insn (which occupies two issue slots) can't be
31627 issued in this group.
31628 - less than 'issue_rate' slots are vacant, and the next insn always needs to
31629 start a new group. */
31631 static int
31632 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31633 rtx_insn *tail)
31635 rtx_insn *insn, *next_insn;
31636 int issue_rate;
31637 int can_issue_more;
31638 int slot, i;
31639 bool group_end;
31640 int group_count = 0;
31641 rtx *group_insns;
31643 /* Initialize. */
31644 issue_rate = rs6000_issue_rate ();
31645 group_insns = XALLOCAVEC (rtx, issue_rate);
31646 for (i = 0; i < issue_rate; i++)
31648 group_insns[i] = 0;
31650 can_issue_more = issue_rate;
31651 slot = 0;
31652 insn = get_next_active_insn (prev_head_insn, tail);
31653 group_end = false;
31655 while (insn != NULL_RTX)
31657 slot = (issue_rate - can_issue_more);
31658 group_insns[slot] = insn;
31659 can_issue_more =
31660 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31661 if (insn_terminates_group_p (insn, current_group))
31662 can_issue_more = 0;
31664 next_insn = get_next_active_insn (insn, tail);
31665 if (next_insn == NULL_RTX)
31666 return group_count + 1;
31668 /* Is next_insn going to start a new group? */
31669 group_end
31670 = (can_issue_more == 0
31671 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31672 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31673 || (can_issue_more < issue_rate &&
31674 insn_terminates_group_p (next_insn, previous_group)));
31676 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
31677 next_insn, &group_end, can_issue_more,
31678 &group_count);
31680 if (group_end)
31682 group_count++;
31683 can_issue_more = 0;
31684 for (i = 0; i < issue_rate; i++)
31686 group_insns[i] = 0;
31690 if (GET_MODE (next_insn) == TImode && can_issue_more)
31691 PUT_MODE (next_insn, VOIDmode);
31692 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
31693 PUT_MODE (next_insn, TImode);
31695 insn = next_insn;
31696 if (can_issue_more == 0)
31697 can_issue_more = issue_rate;
31698 } /* while */
31700 return group_count;
31703 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
31704 dispatch group boundaries that the scheduler had marked. Pad with nops
31705 any dispatch groups which have vacant issue slots, in order to force the
31706 scheduler's grouping on the processor dispatcher. The function
31707 returns the number of dispatch groups found. */
31709 static int
31710 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31711 rtx_insn *tail)
31713 rtx_insn *insn, *next_insn;
31714 rtx nop;
31715 int issue_rate;
31716 int can_issue_more;
31717 int group_end;
31718 int group_count = 0;
31720 /* Initialize issue_rate. */
31721 issue_rate = rs6000_issue_rate ();
31722 can_issue_more = issue_rate;
31724 insn = get_next_active_insn (prev_head_insn, tail);
31725 next_insn = get_next_active_insn (insn, tail);
31727 while (insn != NULL_RTX)
31729 can_issue_more =
31730 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31732 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
31734 if (next_insn == NULL_RTX)
31735 break;
31737 if (group_end)
31739 /* If the scheduler had marked group termination at this location
31740 (between insn and next_insn), and neither insn nor next_insn will
31741 force group termination, pad the group with nops to force group
31742 termination. */
31743 if (can_issue_more
31744 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
31745 && !insn_terminates_group_p (insn, current_group)
31746 && !insn_terminates_group_p (next_insn, previous_group))
31748 if (!is_branch_slot_insn (next_insn))
31749 can_issue_more--;
31751 while (can_issue_more)
31753 nop = gen_nop ();
31754 emit_insn_before (nop, next_insn);
31755 can_issue_more--;
31759 can_issue_more = issue_rate;
31760 group_count++;
31763 insn = next_insn;
31764 next_insn = get_next_active_insn (insn, tail);
31767 return group_count;
31770 /* We're beginning a new block. Initialize data structures as necessary. */
31772 static void
31773 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
31774 int sched_verbose ATTRIBUTE_UNUSED,
31775 int max_ready ATTRIBUTE_UNUSED)
31777 last_scheduled_insn = NULL;
31778 load_store_pendulum = 0;
31779 divide_cnt = 0;
31780 vec_pairing = 0;
31783 /* The following function is called at the end of scheduling BB.
31784 After reload, it inserts nops at insn group bundling. */
31786 static void
31787 rs6000_sched_finish (FILE *dump, int sched_verbose)
31789 int n_groups;
31791 if (sched_verbose)
31792 fprintf (dump, "=== Finishing schedule.\n");
31794 if (reload_completed && rs6000_sched_groups)
31796 /* Do not run sched_finish hook when selective scheduling enabled. */
31797 if (sel_sched_p ())
31798 return;
31800 if (rs6000_sched_insert_nops == sched_finish_none)
31801 return;
31803 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
31804 n_groups = pad_groups (dump, sched_verbose,
31805 current_sched_info->prev_head,
31806 current_sched_info->next_tail);
31807 else
31808 n_groups = redefine_groups (dump, sched_verbose,
31809 current_sched_info->prev_head,
31810 current_sched_info->next_tail);
31812 if (sched_verbose >= 6)
31814 fprintf (dump, "ngroups = %d\n", n_groups);
31815 print_rtl (dump, current_sched_info->prev_head);
31816 fprintf (dump, "Done finish_sched\n");
31821 struct rs6000_sched_context
31823 short cached_can_issue_more;
31824 rtx_insn *last_scheduled_insn;
31825 int load_store_pendulum;
31826 int divide_cnt;
31827 int vec_pairing;
31830 typedef struct rs6000_sched_context rs6000_sched_context_def;
31831 typedef rs6000_sched_context_def *rs6000_sched_context_t;
31833 /* Allocate store for new scheduling context. */
31834 static void *
31835 rs6000_alloc_sched_context (void)
31837 return xmalloc (sizeof (rs6000_sched_context_def));
31840 /* If CLEAN_P is true then initializes _SC with clean data,
31841 and from the global context otherwise. */
31842 static void
31843 rs6000_init_sched_context (void *_sc, bool clean_p)
31845 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
31847 if (clean_p)
31849 sc->cached_can_issue_more = 0;
31850 sc->last_scheduled_insn = NULL;
31851 sc->load_store_pendulum = 0;
31852 sc->divide_cnt = 0;
31853 sc->vec_pairing = 0;
31855 else
31857 sc->cached_can_issue_more = cached_can_issue_more;
31858 sc->last_scheduled_insn = last_scheduled_insn;
31859 sc->load_store_pendulum = load_store_pendulum;
31860 sc->divide_cnt = divide_cnt;
31861 sc->vec_pairing = vec_pairing;
31865 /* Sets the global scheduling context to the one pointed to by _SC. */
31866 static void
31867 rs6000_set_sched_context (void *_sc)
31869 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
31871 gcc_assert (sc != NULL);
31873 cached_can_issue_more = sc->cached_can_issue_more;
31874 last_scheduled_insn = sc->last_scheduled_insn;
31875 load_store_pendulum = sc->load_store_pendulum;
31876 divide_cnt = sc->divide_cnt;
31877 vec_pairing = sc->vec_pairing;
31880 /* Free _SC. */
31881 static void
31882 rs6000_free_sched_context (void *_sc)
31884 gcc_assert (_sc != NULL);
31886 free (_sc);
31889 static bool
31890 rs6000_sched_can_speculate_insn (rtx_insn *insn)
31892 switch (get_attr_type (insn))
31894 case TYPE_DIV:
31895 case TYPE_SDIV:
31896 case TYPE_DDIV:
31897 case TYPE_VECDIV:
31898 case TYPE_SSQRT:
31899 case TYPE_DSQRT:
31900 return false;
31902 default:
31903 return true;
31907 /* Length in units of the trampoline for entering a nested function. */
31910 rs6000_trampoline_size (void)
31912 int ret = 0;
31914 switch (DEFAULT_ABI)
31916 default:
31917 gcc_unreachable ();
31919 case ABI_AIX:
31920 ret = (TARGET_32BIT) ? 12 : 24;
31921 break;
31923 case ABI_ELFv2:
31924 gcc_assert (!TARGET_32BIT);
31925 ret = 32;
31926 break;
31928 case ABI_DARWIN:
31929 case ABI_V4:
31930 ret = (TARGET_32BIT) ? 40 : 48;
31931 break;
31934 return ret;
31937 /* Emit RTL insns to initialize the variable parts of a trampoline.
31938 FNADDR is an RTX for the address of the function's pure code.
31939 CXT is an RTX for the static chain value for the function. */
31941 static void
31942 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
31944 int regsize = (TARGET_32BIT) ? 4 : 8;
31945 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
31946 rtx ctx_reg = force_reg (Pmode, cxt);
31947 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
31949 switch (DEFAULT_ABI)
31951 default:
31952 gcc_unreachable ();
31954 /* Under AIX, just build the 3 word function descriptor */
31955 case ABI_AIX:
31957 rtx fnmem, fn_reg, toc_reg;
31959 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
31960 error ("you cannot take the address of a nested function if you use "
31961 "the %qs option", "-mno-pointers-to-nested-functions");
31963 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
31964 fn_reg = gen_reg_rtx (Pmode);
31965 toc_reg = gen_reg_rtx (Pmode);
31967 /* Macro to shorten the code expansions below. */
31968 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
31970 m_tramp = replace_equiv_address (m_tramp, addr);
31972 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
31973 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
31974 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
31975 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
31976 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
31978 # undef MEM_PLUS
31980 break;
31982 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
31983 case ABI_ELFv2:
31984 case ABI_DARWIN:
31985 case ABI_V4:
31986 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
31987 LCT_NORMAL, VOIDmode,
31988 addr, Pmode,
31989 GEN_INT (rs6000_trampoline_size ()), SImode,
31990 fnaddr, Pmode,
31991 ctx_reg, Pmode);
31992 break;
31997 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
31998 identifier as an argument, so the front end shouldn't look it up. */
32000 static bool
32001 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32003 return is_attribute_p ("altivec", attr_id);
32006 /* Handle the "altivec" attribute. The attribute may have
32007 arguments as follows:
32009 __attribute__((altivec(vector__)))
32010 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32011 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32013 and may appear more than once (e.g., 'vector bool char') in a
32014 given declaration. */
32016 static tree
32017 rs6000_handle_altivec_attribute (tree *node,
32018 tree name ATTRIBUTE_UNUSED,
32019 tree args,
32020 int flags ATTRIBUTE_UNUSED,
32021 bool *no_add_attrs)
32023 tree type = *node, result = NULL_TREE;
32024 machine_mode mode;
32025 int unsigned_p;
32026 char altivec_type
32027 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32028 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32029 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32030 : '?');
32032 while (POINTER_TYPE_P (type)
32033 || TREE_CODE (type) == FUNCTION_TYPE
32034 || TREE_CODE (type) == METHOD_TYPE
32035 || TREE_CODE (type) == ARRAY_TYPE)
32036 type = TREE_TYPE (type);
32038 mode = TYPE_MODE (type);
32040 /* Check for invalid AltiVec type qualifiers. */
32041 if (type == long_double_type_node)
32042 error ("use of %<long double%> in AltiVec types is invalid");
32043 else if (type == boolean_type_node)
32044 error ("use of boolean types in AltiVec types is invalid");
32045 else if (TREE_CODE (type) == COMPLEX_TYPE)
32046 error ("use of %<complex%> in AltiVec types is invalid");
32047 else if (DECIMAL_FLOAT_MODE_P (mode))
32048 error ("use of decimal floating point types in AltiVec types is invalid");
32049 else if (!TARGET_VSX)
32051 if (type == long_unsigned_type_node || type == long_integer_type_node)
32053 if (TARGET_64BIT)
32054 error ("use of %<long%> in AltiVec types is invalid for "
32055 "64-bit code without %qs", "-mvsx");
32056 else if (rs6000_warn_altivec_long)
32057 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32058 "use %<int%>");
32060 else if (type == long_long_unsigned_type_node
32061 || type == long_long_integer_type_node)
32062 error ("use of %<long long%> in AltiVec types is invalid without %qs",
32063 "-mvsx");
32064 else if (type == double_type_node)
32065 error ("use of %<double%> in AltiVec types is invalid without %qs",
32066 "-mvsx");
32069 switch (altivec_type)
32071 case 'v':
32072 unsigned_p = TYPE_UNSIGNED (type);
32073 switch (mode)
32075 case E_TImode:
32076 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32077 break;
32078 case E_DImode:
32079 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32080 break;
32081 case E_SImode:
32082 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32083 break;
32084 case E_HImode:
32085 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32086 break;
32087 case E_QImode:
32088 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32089 break;
32090 case E_SFmode: result = V4SF_type_node; break;
32091 case E_DFmode: result = V2DF_type_node; break;
32092 /* If the user says 'vector int bool', we may be handed the 'bool'
32093 attribute _before_ the 'vector' attribute, and so select the
32094 proper type in the 'b' case below. */
32095 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
32096 case E_V2DImode: case E_V2DFmode:
32097 result = type;
32098 default: break;
32100 break;
32101 case 'b':
32102 switch (mode)
32104 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
32105 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
32106 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
32107 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
32108 default: break;
32110 break;
32111 case 'p':
32112 switch (mode)
32114 case E_V8HImode: result = pixel_V8HI_type_node;
32115 default: break;
32117 default: break;
32120 /* Propagate qualifiers attached to the element type
32121 onto the vector type. */
32122 if (result && result != type && TYPE_QUALS (type))
32123 result = build_qualified_type (result, TYPE_QUALS (type));
32125 *no_add_attrs = true; /* No need to hang on to the attribute. */
32127 if (result)
32128 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32130 return NULL_TREE;
32133 /* AltiVec defines five built-in scalar types that serve as vector
32134 elements; we must teach the compiler how to mangle them. The 128-bit
32135 floating point mangling is target-specific as well. */
32137 static const char *
32138 rs6000_mangle_type (const_tree type)
32140 type = TYPE_MAIN_VARIANT (type);
32142 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32143 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32144 return NULL;
32146 if (type == bool_char_type_node) return "U6__boolc";
32147 if (type == bool_short_type_node) return "U6__bools";
32148 if (type == pixel_type_node) return "u7__pixel";
32149 if (type == bool_int_type_node) return "U6__booli";
32150 if (type == bool_long_long_type_node) return "U6__boolx";
32152 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
32153 return "g";
32154 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
32155 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
32157 /* For all other types, use the default mangling. */
32158 return NULL;
32161 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32162 struct attribute_spec.handler. */
32164 static tree
32165 rs6000_handle_longcall_attribute (tree *node, tree name,
32166 tree args ATTRIBUTE_UNUSED,
32167 int flags ATTRIBUTE_UNUSED,
32168 bool *no_add_attrs)
32170 if (TREE_CODE (*node) != FUNCTION_TYPE
32171 && TREE_CODE (*node) != FIELD_DECL
32172 && TREE_CODE (*node) != TYPE_DECL)
32174 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32175 name);
32176 *no_add_attrs = true;
32179 return NULL_TREE;
32182 /* Set longcall attributes on all functions declared when
32183 rs6000_default_long_calls is true. */
32184 static void
32185 rs6000_set_default_type_attributes (tree type)
32187 if (rs6000_default_long_calls
32188 && (TREE_CODE (type) == FUNCTION_TYPE
32189 || TREE_CODE (type) == METHOD_TYPE))
32190 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32191 NULL_TREE,
32192 TYPE_ATTRIBUTES (type));
32194 #if TARGET_MACHO
32195 darwin_set_default_type_attributes (type);
32196 #endif
32199 /* Return a reference suitable for calling a function with the
32200 longcall attribute. */
32203 rs6000_longcall_ref (rtx call_ref)
32205 const char *call_name;
32206 tree node;
32208 if (GET_CODE (call_ref) != SYMBOL_REF)
32209 return call_ref;
32211 /* System V adds '.' to the internal name, so skip them. */
32212 call_name = XSTR (call_ref, 0);
32213 if (*call_name == '.')
32215 while (*call_name == '.')
32216 call_name++;
32218 node = get_identifier (call_name);
32219 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32222 return force_reg (Pmode, call_ref);
32225 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32226 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32227 #endif
32229 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32230 struct attribute_spec.handler. */
32231 static tree
32232 rs6000_handle_struct_attribute (tree *node, tree name,
32233 tree args ATTRIBUTE_UNUSED,
32234 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32236 tree *type = NULL;
32237 if (DECL_P (*node))
32239 if (TREE_CODE (*node) == TYPE_DECL)
32240 type = &TREE_TYPE (*node);
32242 else
32243 type = node;
32245 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32246 || TREE_CODE (*type) == UNION_TYPE)))
32248 warning (OPT_Wattributes, "%qE attribute ignored", name);
32249 *no_add_attrs = true;
32252 else if ((is_attribute_p ("ms_struct", name)
32253 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32254 || ((is_attribute_p ("gcc_struct", name)
32255 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32257 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32258 name);
32259 *no_add_attrs = true;
32262 return NULL_TREE;
32265 static bool
32266 rs6000_ms_bitfield_layout_p (const_tree record_type)
32268 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32269 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32270 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32273 #ifdef USING_ELFOS_H
32275 /* A get_unnamed_section callback, used for switching to toc_section. */
32277 static void
32278 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32280 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32281 && TARGET_MINIMAL_TOC)
32283 if (!toc_initialized)
32285 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32286 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32287 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32288 fprintf (asm_out_file, "\t.tc ");
32289 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32290 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32291 fprintf (asm_out_file, "\n");
32293 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32294 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32295 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32296 fprintf (asm_out_file, " = .+32768\n");
32297 toc_initialized = 1;
32299 else
32300 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32302 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32304 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32305 if (!toc_initialized)
32307 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32308 toc_initialized = 1;
32311 else
32313 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32314 if (!toc_initialized)
32316 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32317 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32318 fprintf (asm_out_file, " = .+32768\n");
32319 toc_initialized = 1;
32324 /* Implement TARGET_ASM_INIT_SECTIONS. */
32326 static void
32327 rs6000_elf_asm_init_sections (void)
32329 toc_section
32330 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32332 sdata2_section
32333 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32334 SDATA2_SECTION_ASM_OP);
32337 /* Implement TARGET_SELECT_RTX_SECTION. */
32339 static section *
32340 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32341 unsigned HOST_WIDE_INT align)
32343 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32344 return toc_section;
32345 else
32346 return default_elf_select_rtx_section (mode, x, align);
32349 /* For a SYMBOL_REF, set generic flags and then perform some
32350 target-specific processing.
32352 When the AIX ABI is requested on a non-AIX system, replace the
32353 function name with the real name (with a leading .) rather than the
32354 function descriptor name. This saves a lot of overriding code to
32355 read the prefixes. */
32357 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32358 static void
32359 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32361 default_encode_section_info (decl, rtl, first);
32363 if (first
32364 && TREE_CODE (decl) == FUNCTION_DECL
32365 && !TARGET_AIX
32366 && DEFAULT_ABI == ABI_AIX)
32368 rtx sym_ref = XEXP (rtl, 0);
32369 size_t len = strlen (XSTR (sym_ref, 0));
32370 char *str = XALLOCAVEC (char, len + 2);
32371 str[0] = '.';
32372 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
32373 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
32377 static inline bool
32378 compare_section_name (const char *section, const char *templ)
32380 int len;
32382 len = strlen (templ);
32383 return (strncmp (section, templ, len) == 0
32384 && (section[len] == 0 || section[len] == '.'));
32387 bool
32388 rs6000_elf_in_small_data_p (const_tree decl)
32390 if (rs6000_sdata == SDATA_NONE)
32391 return false;
32393 /* We want to merge strings, so we never consider them small data. */
32394 if (TREE_CODE (decl) == STRING_CST)
32395 return false;
32397 /* Functions are never in the small data area. */
32398 if (TREE_CODE (decl) == FUNCTION_DECL)
32399 return false;
32401 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
32403 const char *section = DECL_SECTION_NAME (decl);
32404 if (compare_section_name (section, ".sdata")
32405 || compare_section_name (section, ".sdata2")
32406 || compare_section_name (section, ".gnu.linkonce.s")
32407 || compare_section_name (section, ".sbss")
32408 || compare_section_name (section, ".sbss2")
32409 || compare_section_name (section, ".gnu.linkonce.sb")
32410 || strcmp (section, ".PPC.EMB.sdata0") == 0
32411 || strcmp (section, ".PPC.EMB.sbss0") == 0)
32412 return true;
32414 else
32416 /* If we are told not to put readonly data in sdata, then don't. */
32417 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
32418 && !rs6000_readonly_in_sdata)
32419 return false;
32421 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
32423 if (size > 0
32424 && size <= g_switch_value
32425 /* If it's not public, and we're not going to reference it there,
32426 there's no need to put it in the small data section. */
32427 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
32428 return true;
32431 return false;
32434 #endif /* USING_ELFOS_H */
32436 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
32438 static bool
32439 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
32441 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
32444 /* Do not place thread-local symbols refs in the object blocks. */
32446 static bool
32447 rs6000_use_blocks_for_decl_p (const_tree decl)
32449 return !DECL_THREAD_LOCAL_P (decl);
32452 /* Return a REG that occurs in ADDR with coefficient 1.
32453 ADDR can be effectively incremented by incrementing REG.
32455 r0 is special and we must not select it as an address
32456 register by this routine since our caller will try to
32457 increment the returned register via an "la" instruction. */
32460 find_addr_reg (rtx addr)
32462 while (GET_CODE (addr) == PLUS)
32464 if (GET_CODE (XEXP (addr, 0)) == REG
32465 && REGNO (XEXP (addr, 0)) != 0)
32466 addr = XEXP (addr, 0);
32467 else if (GET_CODE (XEXP (addr, 1)) == REG
32468 && REGNO (XEXP (addr, 1)) != 0)
32469 addr = XEXP (addr, 1);
32470 else if (CONSTANT_P (XEXP (addr, 0)))
32471 addr = XEXP (addr, 1);
32472 else if (CONSTANT_P (XEXP (addr, 1)))
32473 addr = XEXP (addr, 0);
32474 else
32475 gcc_unreachable ();
32477 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
32478 return addr;
32481 void
32482 rs6000_fatal_bad_address (rtx op)
32484 fatal_insn ("bad address", op);
32487 #if TARGET_MACHO
32489 typedef struct branch_island_d {
32490 tree function_name;
32491 tree label_name;
32492 int line_number;
32493 } branch_island;
32496 static vec<branch_island, va_gc> *branch_islands;
32498 /* Remember to generate a branch island for far calls to the given
32499 function. */
32501 static void
32502 add_compiler_branch_island (tree label_name, tree function_name,
32503 int line_number)
32505 branch_island bi = {function_name, label_name, line_number};
32506 vec_safe_push (branch_islands, bi);
32509 /* Generate far-jump branch islands for everything recorded in
32510 branch_islands. Invoked immediately after the last instruction of
32511 the epilogue has been emitted; the branch islands must be appended
32512 to, and contiguous with, the function body. Mach-O stubs are
32513 generated in machopic_output_stub(). */
32515 static void
32516 macho_branch_islands (void)
32518 char tmp_buf[512];
32520 while (!vec_safe_is_empty (branch_islands))
32522 branch_island *bi = &branch_islands->last ();
32523 const char *label = IDENTIFIER_POINTER (bi->label_name);
32524 const char *name = IDENTIFIER_POINTER (bi->function_name);
32525 char name_buf[512];
32526 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
32527 if (name[0] == '*' || name[0] == '&')
32528 strcpy (name_buf, name+1);
32529 else
32531 name_buf[0] = '_';
32532 strcpy (name_buf+1, name);
32534 strcpy (tmp_buf, "\n");
32535 strcat (tmp_buf, label);
32536 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32537 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32538 dbxout_stabd (N_SLINE, bi->line_number);
32539 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32540 if (flag_pic)
32542 if (TARGET_LINK_STACK)
32544 char name[32];
32545 get_ppc476_thunk_name (name);
32546 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
32547 strcat (tmp_buf, name);
32548 strcat (tmp_buf, "\n");
32549 strcat (tmp_buf, label);
32550 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32552 else
32554 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
32555 strcat (tmp_buf, label);
32556 strcat (tmp_buf, "_pic\n");
32557 strcat (tmp_buf, label);
32558 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32561 strcat (tmp_buf, "\taddis r11,r11,ha16(");
32562 strcat (tmp_buf, name_buf);
32563 strcat (tmp_buf, " - ");
32564 strcat (tmp_buf, label);
32565 strcat (tmp_buf, "_pic)\n");
32567 strcat (tmp_buf, "\tmtlr r0\n");
32569 strcat (tmp_buf, "\taddi r12,r11,lo16(");
32570 strcat (tmp_buf, name_buf);
32571 strcat (tmp_buf, " - ");
32572 strcat (tmp_buf, label);
32573 strcat (tmp_buf, "_pic)\n");
32575 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
32577 else
32579 strcat (tmp_buf, ":\nlis r12,hi16(");
32580 strcat (tmp_buf, name_buf);
32581 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
32582 strcat (tmp_buf, name_buf);
32583 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
32585 output_asm_insn (tmp_buf, 0);
32586 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32587 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32588 dbxout_stabd (N_SLINE, bi->line_number);
32589 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32590 branch_islands->pop ();
32594 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
32595 already there or not. */
32597 static int
32598 no_previous_def (tree function_name)
32600 branch_island *bi;
32601 unsigned ix;
32603 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32604 if (function_name == bi->function_name)
32605 return 0;
32606 return 1;
32609 /* GET_PREV_LABEL gets the label name from the previous definition of
32610 the function. */
32612 static tree
32613 get_prev_label (tree function_name)
32615 branch_island *bi;
32616 unsigned ix;
32618 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32619 if (function_name == bi->function_name)
32620 return bi->label_name;
32621 return NULL_TREE;
32624 /* INSN is either a function call or a millicode call. It may have an
32625 unconditional jump in its delay slot.
32627 CALL_DEST is the routine we are calling. */
32629 char *
32630 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
32631 int cookie_operand_number)
32633 static char buf[256];
32634 if (darwin_emit_branch_islands
32635 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
32636 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
32638 tree labelname;
32639 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
32641 if (no_previous_def (funname))
32643 rtx label_rtx = gen_label_rtx ();
32644 char *label_buf, temp_buf[256];
32645 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32646 CODE_LABEL_NUMBER (label_rtx));
32647 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32648 labelname = get_identifier (label_buf);
32649 add_compiler_branch_island (labelname, funname, insn_line (insn));
32651 else
32652 labelname = get_prev_label (funname);
32654 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
32655 instruction will reach 'foo', otherwise link as 'bl L42'".
32656 "L42" should be a 'branch island', that will do a far jump to
32657 'foo'. Branch islands are generated in
32658 macho_branch_islands(). */
32659 sprintf (buf, "jbsr %%z%d,%.246s",
32660 dest_operand_number, IDENTIFIER_POINTER (labelname));
32662 else
32663 sprintf (buf, "bl %%z%d", dest_operand_number);
32664 return buf;
32667 /* Generate PIC and indirect symbol stubs. */
32669 void
32670 machopic_output_stub (FILE *file, const char *symb, const char *stub)
32672 unsigned int length;
32673 char *symbol_name, *lazy_ptr_name;
32674 char *local_label_0;
32675 static int label = 0;
32677 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32678 symb = (*targetm.strip_name_encoding) (symb);
32681 length = strlen (symb);
32682 symbol_name = XALLOCAVEC (char, length + 32);
32683 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
32685 lazy_ptr_name = XALLOCAVEC (char, length + 32);
32686 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
32688 if (flag_pic == 2)
32689 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
32690 else
32691 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
32693 if (flag_pic == 2)
32695 fprintf (file, "\t.align 5\n");
32697 fprintf (file, "%s:\n", stub);
32698 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32700 label++;
32701 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
32702 sprintf (local_label_0, "\"L%011d$spb\"", label);
32704 fprintf (file, "\tmflr r0\n");
32705 if (TARGET_LINK_STACK)
32707 char name[32];
32708 get_ppc476_thunk_name (name);
32709 fprintf (file, "\tbl %s\n", name);
32710 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32712 else
32714 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
32715 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32717 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
32718 lazy_ptr_name, local_label_0);
32719 fprintf (file, "\tmtlr r0\n");
32720 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
32721 (TARGET_64BIT ? "ldu" : "lwzu"),
32722 lazy_ptr_name, local_label_0);
32723 fprintf (file, "\tmtctr r12\n");
32724 fprintf (file, "\tbctr\n");
32726 else
32728 fprintf (file, "\t.align 4\n");
32730 fprintf (file, "%s:\n", stub);
32731 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32733 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
32734 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
32735 (TARGET_64BIT ? "ldu" : "lwzu"),
32736 lazy_ptr_name);
32737 fprintf (file, "\tmtctr r12\n");
32738 fprintf (file, "\tbctr\n");
32741 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
32742 fprintf (file, "%s:\n", lazy_ptr_name);
32743 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32744 fprintf (file, "%sdyld_stub_binding_helper\n",
32745 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
32748 /* Legitimize PIC addresses. If the address is already
32749 position-independent, we return ORIG. Newly generated
32750 position-independent addresses go into a reg. This is REG if non
32751 zero, otherwise we allocate register(s) as necessary. */
32753 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
32756 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
32757 rtx reg)
32759 rtx base, offset;
32761 if (reg == NULL && !reload_completed)
32762 reg = gen_reg_rtx (Pmode);
32764 if (GET_CODE (orig) == CONST)
32766 rtx reg_temp;
32768 if (GET_CODE (XEXP (orig, 0)) == PLUS
32769 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
32770 return orig;
32772 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
32774 /* Use a different reg for the intermediate value, as
32775 it will be marked UNCHANGING. */
32776 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
32777 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
32778 Pmode, reg_temp);
32779 offset =
32780 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
32781 Pmode, reg);
32783 if (GET_CODE (offset) == CONST_INT)
32785 if (SMALL_INT (offset))
32786 return plus_constant (Pmode, base, INTVAL (offset));
32787 else if (!reload_completed)
32788 offset = force_reg (Pmode, offset);
32789 else
32791 rtx mem = force_const_mem (Pmode, orig);
32792 return machopic_legitimize_pic_address (mem, Pmode, reg);
32795 return gen_rtx_PLUS (Pmode, base, offset);
32798 /* Fall back on generic machopic code. */
32799 return machopic_legitimize_pic_address (orig, mode, reg);
32802 /* Output a .machine directive for the Darwin assembler, and call
32803 the generic start_file routine. */
32805 static void
32806 rs6000_darwin_file_start (void)
32808 static const struct
32810 const char *arg;
32811 const char *name;
32812 HOST_WIDE_INT if_set;
32813 } mapping[] = {
32814 { "ppc64", "ppc64", MASK_64BIT },
32815 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
32816 { "power4", "ppc970", 0 },
32817 { "G5", "ppc970", 0 },
32818 { "7450", "ppc7450", 0 },
32819 { "7400", "ppc7400", MASK_ALTIVEC },
32820 { "G4", "ppc7400", 0 },
32821 { "750", "ppc750", 0 },
32822 { "740", "ppc750", 0 },
32823 { "G3", "ppc750", 0 },
32824 { "604e", "ppc604e", 0 },
32825 { "604", "ppc604", 0 },
32826 { "603e", "ppc603", 0 },
32827 { "603", "ppc603", 0 },
32828 { "601", "ppc601", 0 },
32829 { NULL, "ppc", 0 } };
32830 const char *cpu_id = "";
32831 size_t i;
32833 rs6000_file_start ();
32834 darwin_file_start ();
32836 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
32838 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
32839 cpu_id = rs6000_default_cpu;
32841 if (global_options_set.x_rs6000_cpu_index)
32842 cpu_id = processor_target_table[rs6000_cpu_index].name;
32844 /* Look through the mapping array. Pick the first name that either
32845 matches the argument, has a bit set in IF_SET that is also set
32846 in the target flags, or has a NULL name. */
32848 i = 0;
32849 while (mapping[i].arg != NULL
32850 && strcmp (mapping[i].arg, cpu_id) != 0
32851 && (mapping[i].if_set & rs6000_isa_flags) == 0)
32852 i++;
32854 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
32857 #endif /* TARGET_MACHO */
32859 #if TARGET_ELF
32860 static int
32861 rs6000_elf_reloc_rw_mask (void)
32863 if (flag_pic)
32864 return 3;
32865 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32866 return 2;
32867 else
32868 return 0;
32871 /* Record an element in the table of global constructors. SYMBOL is
32872 a SYMBOL_REF of the function to be called; PRIORITY is a number
32873 between 0 and MAX_INIT_PRIORITY.
32875 This differs from default_named_section_asm_out_constructor in
32876 that we have special handling for -mrelocatable. */
32878 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
32879 static void
32880 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
32882 const char *section = ".ctors";
32883 char buf[18];
32885 if (priority != DEFAULT_INIT_PRIORITY)
32887 sprintf (buf, ".ctors.%.5u",
32888 /* Invert the numbering so the linker puts us in the proper
32889 order; constructors are run from right to left, and the
32890 linker sorts in increasing order. */
32891 MAX_INIT_PRIORITY - priority);
32892 section = buf;
32895 switch_to_section (get_section (section, SECTION_WRITE, NULL));
32896 assemble_align (POINTER_SIZE);
32898 if (DEFAULT_ABI == ABI_V4
32899 && (TARGET_RELOCATABLE || flag_pic > 1))
32901 fputs ("\t.long (", asm_out_file);
32902 output_addr_const (asm_out_file, symbol);
32903 fputs (")@fixup\n", asm_out_file);
32905 else
32906 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
32909 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
32910 static void
32911 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
32913 const char *section = ".dtors";
32914 char buf[18];
32916 if (priority != DEFAULT_INIT_PRIORITY)
32918 sprintf (buf, ".dtors.%.5u",
32919 /* Invert the numbering so the linker puts us in the proper
32920 order; constructors are run from right to left, and the
32921 linker sorts in increasing order. */
32922 MAX_INIT_PRIORITY - priority);
32923 section = buf;
32926 switch_to_section (get_section (section, SECTION_WRITE, NULL));
32927 assemble_align (POINTER_SIZE);
32929 if (DEFAULT_ABI == ABI_V4
32930 && (TARGET_RELOCATABLE || flag_pic > 1))
32932 fputs ("\t.long (", asm_out_file);
32933 output_addr_const (asm_out_file, symbol);
32934 fputs (")@fixup\n", asm_out_file);
32936 else
32937 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
32940 void
32941 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
32943 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
32945 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
32946 ASM_OUTPUT_LABEL (file, name);
32947 fputs (DOUBLE_INT_ASM_OP, file);
32948 rs6000_output_function_entry (file, name);
32949 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
32950 if (DOT_SYMBOLS)
32952 fputs ("\t.size\t", file);
32953 assemble_name (file, name);
32954 fputs (",24\n\t.type\t.", file);
32955 assemble_name (file, name);
32956 fputs (",@function\n", file);
32957 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
32959 fputs ("\t.globl\t.", file);
32960 assemble_name (file, name);
32961 putc ('\n', file);
32964 else
32965 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
32966 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
32967 rs6000_output_function_entry (file, name);
32968 fputs (":\n", file);
32969 return;
32972 int uses_toc;
32973 if (DEFAULT_ABI == ABI_V4
32974 && (TARGET_RELOCATABLE || flag_pic > 1)
32975 && !TARGET_SECURE_PLT
32976 && (!constant_pool_empty_p () || crtl->profile)
32977 && (uses_toc = uses_TOC ()))
32979 char buf[256];
32981 if (uses_toc == 2)
32982 switch_to_other_text_partition ();
32983 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
32985 fprintf (file, "\t.long ");
32986 assemble_name (file, toc_label_name);
32987 need_toc_init = 1;
32988 putc ('-', file);
32989 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
32990 assemble_name (file, buf);
32991 putc ('\n', file);
32992 if (uses_toc == 2)
32993 switch_to_other_text_partition ();
32996 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
32997 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
32999 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33001 char buf[256];
33003 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33005 fprintf (file, "\t.quad .TOC.-");
33006 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33007 assemble_name (file, buf);
33008 putc ('\n', file);
33011 if (DEFAULT_ABI == ABI_AIX)
33013 const char *desc_name, *orig_name;
33015 orig_name = (*targetm.strip_name_encoding) (name);
33016 desc_name = orig_name;
33017 while (*desc_name == '.')
33018 desc_name++;
33020 if (TREE_PUBLIC (decl))
33021 fprintf (file, "\t.globl %s\n", desc_name);
33023 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33024 fprintf (file, "%s:\n", desc_name);
33025 fprintf (file, "\t.long %s\n", orig_name);
33026 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33027 fputs ("\t.long 0\n", file);
33028 fprintf (file, "\t.previous\n");
33030 ASM_OUTPUT_LABEL (file, name);
33033 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33034 static void
33035 rs6000_elf_file_end (void)
33037 #ifdef HAVE_AS_GNU_ATTRIBUTE
33038 /* ??? The value emitted depends on options active at file end.
33039 Assume anyone using #pragma or attributes that might change
33040 options knows what they are doing. */
33041 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33042 && rs6000_passes_float)
33044 int fp;
33046 if (TARGET_HARD_FLOAT)
33047 fp = 1;
33048 else
33049 fp = 2;
33050 if (rs6000_passes_long_double)
33052 if (!TARGET_LONG_DOUBLE_128)
33053 fp |= 2 * 4;
33054 else if (TARGET_IEEEQUAD)
33055 fp |= 3 * 4;
33056 else
33057 fp |= 1 * 4;
33059 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33061 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33063 if (rs6000_passes_vector)
33064 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33065 (TARGET_ALTIVEC_ABI ? 2 : 1));
33066 if (rs6000_returns_struct)
33067 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33068 aix_struct_return ? 2 : 1);
33070 #endif
33071 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33072 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33073 file_end_indicate_exec_stack ();
33074 #endif
33076 if (flag_split_stack)
33077 file_end_indicate_split_stack ();
33079 if (cpu_builtin_p)
33081 /* We have expanded a CPU builtin, so we need to emit a reference to
33082 the special symbol that LIBC uses to declare it supports the
33083 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33084 switch_to_section (data_section);
33085 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33086 fprintf (asm_out_file, "\t%s %s\n",
33087 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33090 #endif
33092 #if TARGET_XCOFF
33094 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33095 #define HAVE_XCOFF_DWARF_EXTRAS 0
33096 #endif
33098 static enum unwind_info_type
33099 rs6000_xcoff_debug_unwind_info (void)
33101 return UI_NONE;
33104 static void
33105 rs6000_xcoff_asm_output_anchor (rtx symbol)
33107 char buffer[100];
33109 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33110 SYMBOL_REF_BLOCK_OFFSET (symbol));
33111 fprintf (asm_out_file, "%s", SET_ASM_OP);
33112 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33113 fprintf (asm_out_file, ",");
33114 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33115 fprintf (asm_out_file, "\n");
33118 static void
33119 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33121 fputs (GLOBAL_ASM_OP, stream);
33122 RS6000_OUTPUT_BASENAME (stream, name);
33123 putc ('\n', stream);
33126 /* A get_unnamed_decl callback, used for read-only sections. PTR
33127 points to the section string variable. */
33129 static void
33130 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33132 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33133 *(const char *const *) directive,
33134 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33137 /* Likewise for read-write sections. */
33139 static void
33140 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33142 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33143 *(const char *const *) directive,
33144 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33147 static void
33148 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33150 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33151 *(const char *const *) directive,
33152 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33155 /* A get_unnamed_section callback, used for switching to toc_section. */
33157 static void
33158 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33160 if (TARGET_MINIMAL_TOC)
33162 /* toc_section is always selected at least once from
33163 rs6000_xcoff_file_start, so this is guaranteed to
33164 always be defined once and only once in each file. */
33165 if (!toc_initialized)
33167 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33168 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33169 toc_initialized = 1;
33171 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33172 (TARGET_32BIT ? "" : ",3"));
33174 else
33175 fputs ("\t.toc\n", asm_out_file);
33178 /* Implement TARGET_ASM_INIT_SECTIONS. */
33180 static void
33181 rs6000_xcoff_asm_init_sections (void)
33183 read_only_data_section
33184 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33185 &xcoff_read_only_section_name);
33187 private_data_section
33188 = get_unnamed_section (SECTION_WRITE,
33189 rs6000_xcoff_output_readwrite_section_asm_op,
33190 &xcoff_private_data_section_name);
33192 tls_data_section
33193 = get_unnamed_section (SECTION_TLS,
33194 rs6000_xcoff_output_tls_section_asm_op,
33195 &xcoff_tls_data_section_name);
33197 tls_private_data_section
33198 = get_unnamed_section (SECTION_TLS,
33199 rs6000_xcoff_output_tls_section_asm_op,
33200 &xcoff_private_data_section_name);
33202 read_only_private_data_section
33203 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33204 &xcoff_private_data_section_name);
33206 toc_section
33207 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33209 readonly_data_section = read_only_data_section;
33212 static int
33213 rs6000_xcoff_reloc_rw_mask (void)
33215 return 3;
33218 static void
33219 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33220 tree decl ATTRIBUTE_UNUSED)
33222 int smclass;
33223 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33225 if (flags & SECTION_EXCLUDE)
33226 smclass = 4;
33227 else if (flags & SECTION_DEBUG)
33229 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33230 return;
33232 else if (flags & SECTION_CODE)
33233 smclass = 0;
33234 else if (flags & SECTION_TLS)
33235 smclass = 3;
33236 else if (flags & SECTION_WRITE)
33237 smclass = 2;
33238 else
33239 smclass = 1;
33241 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33242 (flags & SECTION_CODE) ? "." : "",
33243 name, suffix[smclass], flags & SECTION_ENTSIZE);
33246 #define IN_NAMED_SECTION(DECL) \
33247 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33248 && DECL_SECTION_NAME (DECL) != NULL)
33250 static section *
33251 rs6000_xcoff_select_section (tree decl, int reloc,
33252 unsigned HOST_WIDE_INT align)
33254 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33255 named section. */
33256 if (align > BIGGEST_ALIGNMENT)
33258 resolve_unique_section (decl, reloc, true);
33259 if (IN_NAMED_SECTION (decl))
33260 return get_named_section (decl, NULL, reloc);
33263 if (decl_readonly_section (decl, reloc))
33265 if (TREE_PUBLIC (decl))
33266 return read_only_data_section;
33267 else
33268 return read_only_private_data_section;
33270 else
33272 #if HAVE_AS_TLS
33273 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33275 if (TREE_PUBLIC (decl))
33276 return tls_data_section;
33277 else if (bss_initializer_p (decl))
33279 /* Convert to COMMON to emit in BSS. */
33280 DECL_COMMON (decl) = 1;
33281 return tls_comm_section;
33283 else
33284 return tls_private_data_section;
33286 else
33287 #endif
33288 if (TREE_PUBLIC (decl))
33289 return data_section;
33290 else
33291 return private_data_section;
33295 static void
33296 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33298 const char *name;
33300 /* Use select_section for private data and uninitialized data with
33301 alignment <= BIGGEST_ALIGNMENT. */
33302 if (!TREE_PUBLIC (decl)
33303 || DECL_COMMON (decl)
33304 || (DECL_INITIAL (decl) == NULL_TREE
33305 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33306 || DECL_INITIAL (decl) == error_mark_node
33307 || (flag_zero_initialized_in_bss
33308 && initializer_zerop (DECL_INITIAL (decl))))
33309 return;
33311 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33312 name = (*targetm.strip_name_encoding) (name);
33313 set_decl_section_name (decl, name);
33316 /* Select section for constant in constant pool.
33318 On RS/6000, all constants are in the private read-only data area.
33319 However, if this is being placed in the TOC it must be output as a
33320 toc entry. */
33322 static section *
33323 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33324 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33326 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33327 return toc_section;
33328 else
33329 return read_only_private_data_section;
33332 /* Remove any trailing [DS] or the like from the symbol name. */
33334 static const char *
33335 rs6000_xcoff_strip_name_encoding (const char *name)
33337 size_t len;
33338 if (*name == '*')
33339 name++;
33340 len = strlen (name);
33341 if (name[len - 1] == ']')
33342 return ggc_alloc_string (name, len - 4);
33343 else
33344 return name;
33347 /* Section attributes. AIX is always PIC. */
33349 static unsigned int
33350 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33352 unsigned int align;
33353 unsigned int flags = default_section_type_flags (decl, name, reloc);
33355 /* Align to at least UNIT size. */
33356 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33357 align = MIN_UNITS_PER_WORD;
33358 else
33359 /* Increase alignment of large objects if not already stricter. */
33360 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33361 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33362 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33364 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33367 /* Output at beginning of assembler file.
33369 Initialize the section names for the RS/6000 at this point.
33371 Specify filename, including full path, to assembler.
33373 We want to go into the TOC section so at least one .toc will be emitted.
33374 Also, in order to output proper .bs/.es pairs, we need at least one static
33375 [RW] section emitted.
33377 Finally, declare mcount when profiling to make the assembler happy. */
33379 static void
33380 rs6000_xcoff_file_start (void)
33382 rs6000_gen_section_name (&xcoff_bss_section_name,
33383 main_input_filename, ".bss_");
33384 rs6000_gen_section_name (&xcoff_private_data_section_name,
33385 main_input_filename, ".rw_");
33386 rs6000_gen_section_name (&xcoff_read_only_section_name,
33387 main_input_filename, ".ro_");
33388 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33389 main_input_filename, ".tls_");
33390 rs6000_gen_section_name (&xcoff_tbss_section_name,
33391 main_input_filename, ".tbss_[UL]");
33393 fputs ("\t.file\t", asm_out_file);
33394 output_quoted_string (asm_out_file, main_input_filename);
33395 fputc ('\n', asm_out_file);
33396 if (write_symbols != NO_DEBUG)
33397 switch_to_section (private_data_section);
33398 switch_to_section (toc_section);
33399 switch_to_section (text_section);
33400 if (profile_flag)
33401 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33402 rs6000_file_start ();
33405 /* Output at end of assembler file.
33406 On the RS/6000, referencing data should automatically pull in text. */
33408 static void
33409 rs6000_xcoff_file_end (void)
33411 switch_to_section (text_section);
33412 fputs ("_section_.text:\n", asm_out_file);
33413 switch_to_section (data_section);
33414 fputs (TARGET_32BIT
33415 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33416 asm_out_file);
33419 struct declare_alias_data
33421 FILE *file;
33422 bool function_descriptor;
33425 /* Declare alias N. A helper function for for_node_and_aliases. */
33427 static bool
33428 rs6000_declare_alias (struct symtab_node *n, void *d)
33430 struct declare_alias_data *data = (struct declare_alias_data *)d;
33431 /* Main symbol is output specially, because varasm machinery does part of
33432 the job for us - we do not need to declare .globl/lglobs and such. */
33433 if (!n->alias || n->weakref)
33434 return false;
33436 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
33437 return false;
33439 /* Prevent assemble_alias from trying to use .set pseudo operation
33440 that does not behave as expected by the middle-end. */
33441 TREE_ASM_WRITTEN (n->decl) = true;
33443 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
33444 char *buffer = (char *) alloca (strlen (name) + 2);
33445 char *p;
33446 int dollar_inside = 0;
33448 strcpy (buffer, name);
33449 p = strchr (buffer, '$');
33450 while (p) {
33451 *p = '_';
33452 dollar_inside++;
33453 p = strchr (p + 1, '$');
33455 if (TREE_PUBLIC (n->decl))
33457 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
33459 if (dollar_inside) {
33460 if (data->function_descriptor)
33461 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33462 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33464 if (data->function_descriptor)
33466 fputs ("\t.globl .", data->file);
33467 RS6000_OUTPUT_BASENAME (data->file, buffer);
33468 putc ('\n', data->file);
33470 fputs ("\t.globl ", data->file);
33471 RS6000_OUTPUT_BASENAME (data->file, buffer);
33472 putc ('\n', data->file);
33474 #ifdef ASM_WEAKEN_DECL
33475 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
33476 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
33477 #endif
33479 else
33481 if (dollar_inside)
33483 if (data->function_descriptor)
33484 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33485 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33487 if (data->function_descriptor)
33489 fputs ("\t.lglobl .", data->file);
33490 RS6000_OUTPUT_BASENAME (data->file, buffer);
33491 putc ('\n', data->file);
33493 fputs ("\t.lglobl ", data->file);
33494 RS6000_OUTPUT_BASENAME (data->file, buffer);
33495 putc ('\n', data->file);
33497 if (data->function_descriptor)
33498 fputs (".", data->file);
33499 RS6000_OUTPUT_BASENAME (data->file, buffer);
33500 fputs (":\n", data->file);
33501 return false;
33505 #ifdef HAVE_GAS_HIDDEN
33506 /* Helper function to calculate visibility of a DECL
33507 and return the value as a const string. */
33509 static const char *
33510 rs6000_xcoff_visibility (tree decl)
33512 static const char * const visibility_types[] = {
33513 "", ",protected", ",hidden", ",internal"
33516 enum symbol_visibility vis = DECL_VISIBILITY (decl);
33518 if (TREE_CODE (decl) == FUNCTION_DECL
33519 && cgraph_node::get (decl)
33520 && cgraph_node::get (decl)->instrumentation_clone
33521 && cgraph_node::get (decl)->instrumented_version)
33522 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
33524 return visibility_types[vis];
33526 #endif
33529 /* This macro produces the initial definition of a function name.
33530 On the RS/6000, we need to place an extra '.' in the function name and
33531 output the function descriptor.
33532 Dollar signs are converted to underscores.
33534 The csect for the function will have already been created when
33535 text_section was selected. We do have to go back to that csect, however.
33537 The third and fourth parameters to the .function pseudo-op (16 and 044)
33538 are placeholders which no longer have any use.
33540 Because AIX assembler's .set command has unexpected semantics, we output
33541 all aliases as alternative labels in front of the definition. */
33543 void
33544 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
33546 char *buffer = (char *) alloca (strlen (name) + 1);
33547 char *p;
33548 int dollar_inside = 0;
33549 struct declare_alias_data data = {file, false};
33551 strcpy (buffer, name);
33552 p = strchr (buffer, '$');
33553 while (p) {
33554 *p = '_';
33555 dollar_inside++;
33556 p = strchr (p + 1, '$');
33558 if (TREE_PUBLIC (decl))
33560 if (!RS6000_WEAK || !DECL_WEAK (decl))
33562 if (dollar_inside) {
33563 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33564 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33566 fputs ("\t.globl .", file);
33567 RS6000_OUTPUT_BASENAME (file, buffer);
33568 #ifdef HAVE_GAS_HIDDEN
33569 fputs (rs6000_xcoff_visibility (decl), file);
33570 #endif
33571 putc ('\n', file);
33574 else
33576 if (dollar_inside) {
33577 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33578 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33580 fputs ("\t.lglobl .", file);
33581 RS6000_OUTPUT_BASENAME (file, buffer);
33582 putc ('\n', file);
33584 fputs ("\t.csect ", file);
33585 RS6000_OUTPUT_BASENAME (file, buffer);
33586 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
33587 RS6000_OUTPUT_BASENAME (file, buffer);
33588 fputs (":\n", file);
33589 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33590 &data, true);
33591 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
33592 RS6000_OUTPUT_BASENAME (file, buffer);
33593 fputs (", TOC[tc0], 0\n", file);
33594 in_section = NULL;
33595 switch_to_section (function_section (decl));
33596 putc ('.', file);
33597 RS6000_OUTPUT_BASENAME (file, buffer);
33598 fputs (":\n", file);
33599 data.function_descriptor = true;
33600 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33601 &data, true);
33602 if (!DECL_IGNORED_P (decl))
33604 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33605 xcoffout_declare_function (file, decl, buffer);
33606 else if (write_symbols == DWARF2_DEBUG)
33608 name = (*targetm.strip_name_encoding) (name);
33609 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
33612 return;
33616 /* Output assembly language to globalize a symbol from a DECL,
33617 possibly with visibility. */
33619 void
33620 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
33622 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33623 fputs (GLOBAL_ASM_OP, stream);
33624 RS6000_OUTPUT_BASENAME (stream, name);
33625 #ifdef HAVE_GAS_HIDDEN
33626 fputs (rs6000_xcoff_visibility (decl), stream);
33627 #endif
33628 putc ('\n', stream);
33631 /* Output assembly language to define a symbol as COMMON from a DECL,
33632 possibly with visibility. */
33634 void
33635 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
33636 tree decl ATTRIBUTE_UNUSED,
33637 const char *name,
33638 unsigned HOST_WIDE_INT size,
33639 unsigned HOST_WIDE_INT align)
33641 unsigned HOST_WIDE_INT align2 = 2;
33643 if (align > 32)
33644 align2 = floor_log2 (align / BITS_PER_UNIT);
33645 else if (size > 4)
33646 align2 = 3;
33648 fputs (COMMON_ASM_OP, stream);
33649 RS6000_OUTPUT_BASENAME (stream, name);
33651 fprintf (stream,
33652 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
33653 size, align2);
33655 #ifdef HAVE_GAS_HIDDEN
33656 if (decl != NULL)
33657 fputs (rs6000_xcoff_visibility (decl), stream);
33658 #endif
33659 putc ('\n', stream);
33662 /* This macro produces the initial definition of a object (variable) name.
33663 Because AIX assembler's .set command has unexpected semantics, we output
33664 all aliases as alternative labels in front of the definition. */
33666 void
33667 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
33669 struct declare_alias_data data = {file, false};
33670 RS6000_OUTPUT_BASENAME (file, name);
33671 fputs (":\n", file);
33672 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33673 &data, true);
33676 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
33678 void
33679 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
33681 fputs (integer_asm_op (size, FALSE), file);
33682 assemble_name (file, label);
33683 fputs ("-$", file);
33686 /* Output a symbol offset relative to the dbase for the current object.
33687 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
33688 signed offsets.
33690 __gcc_unwind_dbase is embedded in all executables/libraries through
33691 libgcc/config/rs6000/crtdbase.S. */
33693 void
33694 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
33696 fputs (integer_asm_op (size, FALSE), file);
33697 assemble_name (file, label);
33698 fputs("-__gcc_unwind_dbase", file);
33701 #ifdef HAVE_AS_TLS
33702 static void
33703 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
33705 rtx symbol;
33706 int flags;
33707 const char *symname;
33709 default_encode_section_info (decl, rtl, first);
33711 /* Careful not to prod global register variables. */
33712 if (!MEM_P (rtl))
33713 return;
33714 symbol = XEXP (rtl, 0);
33715 if (GET_CODE (symbol) != SYMBOL_REF)
33716 return;
33718 flags = SYMBOL_REF_FLAGS (symbol);
33720 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33721 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
33723 SYMBOL_REF_FLAGS (symbol) = flags;
33725 /* Append mapping class to extern decls. */
33726 symname = XSTR (symbol, 0);
33727 if (decl /* sync condition with assemble_external () */
33728 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
33729 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
33730 || TREE_CODE (decl) == FUNCTION_DECL)
33731 && symname[strlen (symname) - 1] != ']')
33733 char *newname = (char *) alloca (strlen (symname) + 5);
33734 strcpy (newname, symname);
33735 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
33736 ? "[DS]" : "[UA]"));
33737 XSTR (symbol, 0) = ggc_strdup (newname);
33740 #endif /* HAVE_AS_TLS */
33741 #endif /* TARGET_XCOFF */
33743 void
33744 rs6000_asm_weaken_decl (FILE *stream, tree decl,
33745 const char *name, const char *val)
33747 fputs ("\t.weak\t", stream);
33748 RS6000_OUTPUT_BASENAME (stream, name);
33749 if (decl && TREE_CODE (decl) == FUNCTION_DECL
33750 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
33752 if (TARGET_XCOFF)
33753 fputs ("[DS]", stream);
33754 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
33755 if (TARGET_XCOFF)
33756 fputs (rs6000_xcoff_visibility (decl), stream);
33757 #endif
33758 fputs ("\n\t.weak\t.", stream);
33759 RS6000_OUTPUT_BASENAME (stream, name);
33761 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
33762 if (TARGET_XCOFF)
33763 fputs (rs6000_xcoff_visibility (decl), stream);
33764 #endif
33765 fputc ('\n', stream);
33766 if (val)
33768 #ifdef ASM_OUTPUT_DEF
33769 ASM_OUTPUT_DEF (stream, name, val);
33770 #endif
33771 if (decl && TREE_CODE (decl) == FUNCTION_DECL
33772 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
33774 fputs ("\t.set\t.", stream);
33775 RS6000_OUTPUT_BASENAME (stream, name);
33776 fputs (",.", stream);
33777 RS6000_OUTPUT_BASENAME (stream, val);
33778 fputc ('\n', stream);
33784 /* Return true if INSN should not be copied. */
33786 static bool
33787 rs6000_cannot_copy_insn_p (rtx_insn *insn)
33789 return recog_memoized (insn) >= 0
33790 && get_attr_cannot_copy (insn);
33793 /* Compute a (partial) cost for rtx X. Return true if the complete
33794 cost has been computed, and false if subexpressions should be
33795 scanned. In either case, *TOTAL contains the cost result. */
33797 static bool
33798 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
33799 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
33801 int code = GET_CODE (x);
33803 switch (code)
33805 /* On the RS/6000, if it is valid in the insn, it is free. */
33806 case CONST_INT:
33807 if (((outer_code == SET
33808 || outer_code == PLUS
33809 || outer_code == MINUS)
33810 && (satisfies_constraint_I (x)
33811 || satisfies_constraint_L (x)))
33812 || (outer_code == AND
33813 && (satisfies_constraint_K (x)
33814 || (mode == SImode
33815 ? satisfies_constraint_L (x)
33816 : satisfies_constraint_J (x))))
33817 || ((outer_code == IOR || outer_code == XOR)
33818 && (satisfies_constraint_K (x)
33819 || (mode == SImode
33820 ? satisfies_constraint_L (x)
33821 : satisfies_constraint_J (x))))
33822 || outer_code == ASHIFT
33823 || outer_code == ASHIFTRT
33824 || outer_code == LSHIFTRT
33825 || outer_code == ROTATE
33826 || outer_code == ROTATERT
33827 || outer_code == ZERO_EXTRACT
33828 || (outer_code == MULT
33829 && satisfies_constraint_I (x))
33830 || ((outer_code == DIV || outer_code == UDIV
33831 || outer_code == MOD || outer_code == UMOD)
33832 && exact_log2 (INTVAL (x)) >= 0)
33833 || (outer_code == COMPARE
33834 && (satisfies_constraint_I (x)
33835 || satisfies_constraint_K (x)))
33836 || ((outer_code == EQ || outer_code == NE)
33837 && (satisfies_constraint_I (x)
33838 || satisfies_constraint_K (x)
33839 || (mode == SImode
33840 ? satisfies_constraint_L (x)
33841 : satisfies_constraint_J (x))))
33842 || (outer_code == GTU
33843 && satisfies_constraint_I (x))
33844 || (outer_code == LTU
33845 && satisfies_constraint_P (x)))
33847 *total = 0;
33848 return true;
33850 else if ((outer_code == PLUS
33851 && reg_or_add_cint_operand (x, VOIDmode))
33852 || (outer_code == MINUS
33853 && reg_or_sub_cint_operand (x, VOIDmode))
33854 || ((outer_code == SET
33855 || outer_code == IOR
33856 || outer_code == XOR)
33857 && (INTVAL (x)
33858 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
33860 *total = COSTS_N_INSNS (1);
33861 return true;
33863 /* FALLTHRU */
33865 case CONST_DOUBLE:
33866 case CONST_WIDE_INT:
33867 case CONST:
33868 case HIGH:
33869 case SYMBOL_REF:
33870 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
33871 return true;
33873 case MEM:
33874 /* When optimizing for size, MEM should be slightly more expensive
33875 than generating address, e.g., (plus (reg) (const)).
33876 L1 cache latency is about two instructions. */
33877 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
33878 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
33879 *total += COSTS_N_INSNS (100);
33880 return true;
33882 case LABEL_REF:
33883 *total = 0;
33884 return true;
33886 case PLUS:
33887 case MINUS:
33888 if (FLOAT_MODE_P (mode))
33889 *total = rs6000_cost->fp;
33890 else
33891 *total = COSTS_N_INSNS (1);
33892 return false;
33894 case MULT:
33895 if (GET_CODE (XEXP (x, 1)) == CONST_INT
33896 && satisfies_constraint_I (XEXP (x, 1)))
33898 if (INTVAL (XEXP (x, 1)) >= -256
33899 && INTVAL (XEXP (x, 1)) <= 255)
33900 *total = rs6000_cost->mulsi_const9;
33901 else
33902 *total = rs6000_cost->mulsi_const;
33904 else if (mode == SFmode)
33905 *total = rs6000_cost->fp;
33906 else if (FLOAT_MODE_P (mode))
33907 *total = rs6000_cost->dmul;
33908 else if (mode == DImode)
33909 *total = rs6000_cost->muldi;
33910 else
33911 *total = rs6000_cost->mulsi;
33912 return false;
33914 case FMA:
33915 if (mode == SFmode)
33916 *total = rs6000_cost->fp;
33917 else
33918 *total = rs6000_cost->dmul;
33919 break;
33921 case DIV:
33922 case MOD:
33923 if (FLOAT_MODE_P (mode))
33925 *total = mode == DFmode ? rs6000_cost->ddiv
33926 : rs6000_cost->sdiv;
33927 return false;
33929 /* FALLTHRU */
33931 case UDIV:
33932 case UMOD:
33933 if (GET_CODE (XEXP (x, 1)) == CONST_INT
33934 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
33936 if (code == DIV || code == MOD)
33937 /* Shift, addze */
33938 *total = COSTS_N_INSNS (2);
33939 else
33940 /* Shift */
33941 *total = COSTS_N_INSNS (1);
33943 else
33945 if (GET_MODE (XEXP (x, 1)) == DImode)
33946 *total = rs6000_cost->divdi;
33947 else
33948 *total = rs6000_cost->divsi;
33950 /* Add in shift and subtract for MOD unless we have a mod instruction. */
33951 if (!TARGET_MODULO && (code == MOD || code == UMOD))
33952 *total += COSTS_N_INSNS (2);
33953 return false;
33955 case CTZ:
33956 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
33957 return false;
33959 case FFS:
33960 *total = COSTS_N_INSNS (4);
33961 return false;
33963 case POPCOUNT:
33964 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
33965 return false;
33967 case PARITY:
33968 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
33969 return false;
33971 case NOT:
33972 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
33973 *total = 0;
33974 else
33975 *total = COSTS_N_INSNS (1);
33976 return false;
33978 case AND:
33979 if (CONST_INT_P (XEXP (x, 1)))
33981 rtx left = XEXP (x, 0);
33982 rtx_code left_code = GET_CODE (left);
33984 /* rotate-and-mask: 1 insn. */
33985 if ((left_code == ROTATE
33986 || left_code == ASHIFT
33987 || left_code == LSHIFTRT)
33988 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
33990 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
33991 if (!CONST_INT_P (XEXP (left, 1)))
33992 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
33993 *total += COSTS_N_INSNS (1);
33994 return true;
33997 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
33998 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
33999 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34000 || (val & 0xffff) == val
34001 || (val & 0xffff0000) == val
34002 || ((val & 0xffff) == 0 && mode == SImode))
34004 *total = rtx_cost (left, mode, AND, 0, speed);
34005 *total += COSTS_N_INSNS (1);
34006 return true;
34009 /* 2 insns. */
34010 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34012 *total = rtx_cost (left, mode, AND, 0, speed);
34013 *total += COSTS_N_INSNS (2);
34014 return true;
34018 *total = COSTS_N_INSNS (1);
34019 return false;
34021 case IOR:
34022 /* FIXME */
34023 *total = COSTS_N_INSNS (1);
34024 return true;
34026 case CLZ:
34027 case XOR:
34028 case ZERO_EXTRACT:
34029 *total = COSTS_N_INSNS (1);
34030 return false;
34032 case ASHIFT:
34033 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34034 the sign extend and shift separately within the insn. */
34035 if (TARGET_EXTSWSLI && mode == DImode
34036 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34037 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34039 *total = 0;
34040 return false;
34042 /* fall through */
34044 case ASHIFTRT:
34045 case LSHIFTRT:
34046 case ROTATE:
34047 case ROTATERT:
34048 /* Handle mul_highpart. */
34049 if (outer_code == TRUNCATE
34050 && GET_CODE (XEXP (x, 0)) == MULT)
34052 if (mode == DImode)
34053 *total = rs6000_cost->muldi;
34054 else
34055 *total = rs6000_cost->mulsi;
34056 return true;
34058 else if (outer_code == AND)
34059 *total = 0;
34060 else
34061 *total = COSTS_N_INSNS (1);
34062 return false;
34064 case SIGN_EXTEND:
34065 case ZERO_EXTEND:
34066 if (GET_CODE (XEXP (x, 0)) == MEM)
34067 *total = 0;
34068 else
34069 *total = COSTS_N_INSNS (1);
34070 return false;
34072 case COMPARE:
34073 case NEG:
34074 case ABS:
34075 if (!FLOAT_MODE_P (mode))
34077 *total = COSTS_N_INSNS (1);
34078 return false;
34080 /* FALLTHRU */
34082 case FLOAT:
34083 case UNSIGNED_FLOAT:
34084 case FIX:
34085 case UNSIGNED_FIX:
34086 case FLOAT_TRUNCATE:
34087 *total = rs6000_cost->fp;
34088 return false;
34090 case FLOAT_EXTEND:
34091 if (mode == DFmode)
34092 *total = rs6000_cost->sfdf_convert;
34093 else
34094 *total = rs6000_cost->fp;
34095 return false;
34097 case UNSPEC:
34098 switch (XINT (x, 1))
34100 case UNSPEC_FRSP:
34101 *total = rs6000_cost->fp;
34102 return true;
34104 default:
34105 break;
34107 break;
34109 case CALL:
34110 case IF_THEN_ELSE:
34111 if (!speed)
34113 *total = COSTS_N_INSNS (1);
34114 return true;
34116 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34118 *total = rs6000_cost->fp;
34119 return false;
34121 break;
34123 case NE:
34124 case EQ:
34125 case GTU:
34126 case LTU:
34127 /* Carry bit requires mode == Pmode.
34128 NEG or PLUS already counted so only add one. */
34129 if (mode == Pmode
34130 && (outer_code == NEG || outer_code == PLUS))
34132 *total = COSTS_N_INSNS (1);
34133 return true;
34135 /* FALLTHRU */
34137 case GT:
34138 case LT:
34139 case UNORDERED:
34140 if (outer_code == SET)
34142 if (XEXP (x, 1) == const0_rtx)
34144 *total = COSTS_N_INSNS (2);
34145 return true;
34147 else
34149 *total = COSTS_N_INSNS (3);
34150 return false;
34153 /* CC COMPARE. */
34154 if (outer_code == COMPARE)
34156 *total = 0;
34157 return true;
34159 break;
34161 default:
34162 break;
34165 return false;
34168 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34170 static bool
34171 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34172 int opno, int *total, bool speed)
34174 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34176 fprintf (stderr,
34177 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34178 "opno = %d, total = %d, speed = %s, x:\n",
34179 ret ? "complete" : "scan inner",
34180 GET_MODE_NAME (mode),
34181 GET_RTX_NAME (outer_code),
34182 opno,
34183 *total,
34184 speed ? "true" : "false");
34186 debug_rtx (x);
34188 return ret;
34191 static int
34192 rs6000_insn_cost (rtx_insn *insn, bool speed)
34194 if (recog_memoized (insn) < 0)
34195 return 0;
34197 if (!speed)
34198 return get_attr_length (insn);
34200 int cost = get_attr_cost (insn);
34201 if (cost > 0)
34202 return cost;
34204 int n = get_attr_length (insn) / 4;
34205 enum attr_type type = get_attr_type (insn);
34207 switch (type)
34209 case TYPE_LOAD:
34210 case TYPE_FPLOAD:
34211 case TYPE_VECLOAD:
34212 cost = COSTS_N_INSNS (n + 1);
34213 break;
34215 case TYPE_MUL:
34216 switch (get_attr_size (insn))
34218 case SIZE_8:
34219 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
34220 break;
34221 case SIZE_16:
34222 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
34223 break;
34224 case SIZE_32:
34225 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
34226 break;
34227 case SIZE_64:
34228 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
34229 break;
34230 default:
34231 gcc_unreachable ();
34233 break;
34234 case TYPE_DIV:
34235 switch (get_attr_size (insn))
34237 case SIZE_32:
34238 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
34239 break;
34240 case SIZE_64:
34241 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
34242 break;
34243 default:
34244 gcc_unreachable ();
34246 break;
34248 case TYPE_FP:
34249 cost = n * rs6000_cost->fp;
34250 break;
34251 case TYPE_DMUL:
34252 cost = n * rs6000_cost->dmul;
34253 break;
34254 case TYPE_SDIV:
34255 cost = n * rs6000_cost->sdiv;
34256 break;
34257 case TYPE_DDIV:
34258 cost = n * rs6000_cost->ddiv;
34259 break;
34261 case TYPE_SYNC:
34262 case TYPE_LOAD_L:
34263 case TYPE_MFCR:
34264 case TYPE_MFCRF:
34265 cost = COSTS_N_INSNS (n + 2);
34266 break;
34268 default:
34269 cost = COSTS_N_INSNS (n);
34272 return cost;
34275 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34277 static int
34278 rs6000_debug_address_cost (rtx x, machine_mode mode,
34279 addr_space_t as, bool speed)
34281 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34283 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34284 ret, speed ? "true" : "false");
34285 debug_rtx (x);
34287 return ret;
34291 /* A C expression returning the cost of moving data from a register of class
34292 CLASS1 to one of CLASS2. */
34294 static int
34295 rs6000_register_move_cost (machine_mode mode,
34296 reg_class_t from, reg_class_t to)
34298 int ret;
34300 if (TARGET_DEBUG_COST)
34301 dbg_cost_ctrl++;
34303 /* Moves from/to GENERAL_REGS. */
34304 if (reg_classes_intersect_p (to, GENERAL_REGS)
34305 || reg_classes_intersect_p (from, GENERAL_REGS))
34307 reg_class_t rclass = from;
34309 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34310 rclass = to;
34312 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34313 ret = (rs6000_memory_move_cost (mode, rclass, false)
34314 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34316 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34317 shift. */
34318 else if (rclass == CR_REGS)
34319 ret = 4;
34321 /* For those processors that have slow LR/CTR moves, make them more
34322 expensive than memory in order to bias spills to memory .*/
34323 else if ((rs6000_tune == PROCESSOR_POWER6
34324 || rs6000_tune == PROCESSOR_POWER7
34325 || rs6000_tune == PROCESSOR_POWER8
34326 || rs6000_tune == PROCESSOR_POWER9)
34327 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34328 ret = 6 * hard_regno_nregs (0, mode);
34330 else
34331 /* A move will cost one instruction per GPR moved. */
34332 ret = 2 * hard_regno_nregs (0, mode);
34335 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34336 else if (VECTOR_MEM_VSX_P (mode)
34337 && reg_classes_intersect_p (to, VSX_REGS)
34338 && reg_classes_intersect_p (from, VSX_REGS))
34339 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
34341 /* Moving between two similar registers is just one instruction. */
34342 else if (reg_classes_intersect_p (to, from))
34343 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34345 /* Everything else has to go through GENERAL_REGS. */
34346 else
34347 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34348 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34350 if (TARGET_DEBUG_COST)
34352 if (dbg_cost_ctrl == 1)
34353 fprintf (stderr,
34354 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34355 ret, GET_MODE_NAME (mode), reg_class_names[from],
34356 reg_class_names[to]);
34357 dbg_cost_ctrl--;
34360 return ret;
34363 /* A C expressions returning the cost of moving data of MODE from a register to
34364 or from memory. */
34366 static int
34367 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34368 bool in ATTRIBUTE_UNUSED)
34370 int ret;
34372 if (TARGET_DEBUG_COST)
34373 dbg_cost_ctrl++;
34375 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34376 ret = 4 * hard_regno_nregs (0, mode);
34377 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34378 || reg_classes_intersect_p (rclass, VSX_REGS)))
34379 ret = 4 * hard_regno_nregs (32, mode);
34380 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34381 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
34382 else
34383 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34385 if (TARGET_DEBUG_COST)
34387 if (dbg_cost_ctrl == 1)
34388 fprintf (stderr,
34389 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34390 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34391 dbg_cost_ctrl--;
34394 return ret;
34397 /* Returns a code for a target-specific builtin that implements
34398 reciprocal of the function, or NULL_TREE if not available. */
34400 static tree
34401 rs6000_builtin_reciprocal (tree fndecl)
34403 switch (DECL_FUNCTION_CODE (fndecl))
34405 case VSX_BUILTIN_XVSQRTDP:
34406 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34407 return NULL_TREE;
34409 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34411 case VSX_BUILTIN_XVSQRTSP:
34412 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34413 return NULL_TREE;
34415 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34417 default:
34418 return NULL_TREE;
34422 /* Load up a constant. If the mode is a vector mode, splat the value across
34423 all of the vector elements. */
34425 static rtx
34426 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34428 rtx reg;
34430 if (mode == SFmode || mode == DFmode)
34432 rtx d = const_double_from_real_value (dconst, mode);
34433 reg = force_reg (mode, d);
34435 else if (mode == V4SFmode)
34437 rtx d = const_double_from_real_value (dconst, SFmode);
34438 rtvec v = gen_rtvec (4, d, d, d, d);
34439 reg = gen_reg_rtx (mode);
34440 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34442 else if (mode == V2DFmode)
34444 rtx d = const_double_from_real_value (dconst, DFmode);
34445 rtvec v = gen_rtvec (2, d, d);
34446 reg = gen_reg_rtx (mode);
34447 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34449 else
34450 gcc_unreachable ();
34452 return reg;
34455 /* Generate an FMA instruction. */
34457 static void
34458 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34460 machine_mode mode = GET_MODE (target);
34461 rtx dst;
34463 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34464 gcc_assert (dst != NULL);
34466 if (dst != target)
34467 emit_move_insn (target, dst);
34470 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34472 static void
34473 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34475 machine_mode mode = GET_MODE (dst);
34476 rtx r;
34478 /* This is a tad more complicated, since the fnma_optab is for
34479 a different expression: fma(-m1, m2, a), which is the same
34480 thing except in the case of signed zeros.
34482 Fortunately we know that if FMA is supported that FNMSUB is
34483 also supported in the ISA. Just expand it directly. */
34485 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34487 r = gen_rtx_NEG (mode, a);
34488 r = gen_rtx_FMA (mode, m1, m2, r);
34489 r = gen_rtx_NEG (mode, r);
34490 emit_insn (gen_rtx_SET (dst, r));
34493 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34494 add a reg_note saying that this was a division. Support both scalar and
34495 vector divide. Assumes no trapping math and finite arguments. */
34497 void
34498 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34500 machine_mode mode = GET_MODE (dst);
34501 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34502 int i;
34504 /* Low precision estimates guarantee 5 bits of accuracy. High
34505 precision estimates guarantee 14 bits of accuracy. SFmode
34506 requires 23 bits of accuracy. DFmode requires 52 bits of
34507 accuracy. Each pass at least doubles the accuracy, leading
34508 to the following. */
34509 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34510 if (mode == DFmode || mode == V2DFmode)
34511 passes++;
34513 enum insn_code code = optab_handler (smul_optab, mode);
34514 insn_gen_fn gen_mul = GEN_FCN (code);
34516 gcc_assert (code != CODE_FOR_nothing);
34518 one = rs6000_load_constant_and_splat (mode, dconst1);
34520 /* x0 = 1./d estimate */
34521 x0 = gen_reg_rtx (mode);
34522 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
34523 UNSPEC_FRES)));
34525 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
34526 if (passes > 1) {
34528 /* e0 = 1. - d * x0 */
34529 e0 = gen_reg_rtx (mode);
34530 rs6000_emit_nmsub (e0, d, x0, one);
34532 /* x1 = x0 + e0 * x0 */
34533 x1 = gen_reg_rtx (mode);
34534 rs6000_emit_madd (x1, e0, x0, x0);
34536 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
34537 ++i, xprev = xnext, eprev = enext) {
34539 /* enext = eprev * eprev */
34540 enext = gen_reg_rtx (mode);
34541 emit_insn (gen_mul (enext, eprev, eprev));
34543 /* xnext = xprev + enext * xprev */
34544 xnext = gen_reg_rtx (mode);
34545 rs6000_emit_madd (xnext, enext, xprev, xprev);
34548 } else
34549 xprev = x0;
34551 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
34553 /* u = n * xprev */
34554 u = gen_reg_rtx (mode);
34555 emit_insn (gen_mul (u, n, xprev));
34557 /* v = n - (d * u) */
34558 v = gen_reg_rtx (mode);
34559 rs6000_emit_nmsub (v, d, u, n);
34561 /* dst = (v * xprev) + u */
34562 rs6000_emit_madd (dst, v, xprev, u);
34564 if (note_p)
34565 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
34568 /* Goldschmidt's Algorithm for single/double-precision floating point
34569 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
34571 void
34572 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
34574 machine_mode mode = GET_MODE (src);
34575 rtx e = gen_reg_rtx (mode);
34576 rtx g = gen_reg_rtx (mode);
34577 rtx h = gen_reg_rtx (mode);
34579 /* Low precision estimates guarantee 5 bits of accuracy. High
34580 precision estimates guarantee 14 bits of accuracy. SFmode
34581 requires 23 bits of accuracy. DFmode requires 52 bits of
34582 accuracy. Each pass at least doubles the accuracy, leading
34583 to the following. */
34584 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34585 if (mode == DFmode || mode == V2DFmode)
34586 passes++;
34588 int i;
34589 rtx mhalf;
34590 enum insn_code code = optab_handler (smul_optab, mode);
34591 insn_gen_fn gen_mul = GEN_FCN (code);
34593 gcc_assert (code != CODE_FOR_nothing);
34595 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
34597 /* e = rsqrt estimate */
34598 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
34599 UNSPEC_RSQRT)));
34601 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
34602 if (!recip)
34604 rtx zero = force_reg (mode, CONST0_RTX (mode));
34606 if (mode == SFmode)
34608 rtx target = emit_conditional_move (e, GT, src, zero, mode,
34609 e, zero, mode, 0);
34610 if (target != e)
34611 emit_move_insn (e, target);
34613 else
34615 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
34616 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
34620 /* g = sqrt estimate. */
34621 emit_insn (gen_mul (g, e, src));
34622 /* h = 1/(2*sqrt) estimate. */
34623 emit_insn (gen_mul (h, e, mhalf));
34625 if (recip)
34627 if (passes == 1)
34629 rtx t = gen_reg_rtx (mode);
34630 rs6000_emit_nmsub (t, g, h, mhalf);
34631 /* Apply correction directly to 1/rsqrt estimate. */
34632 rs6000_emit_madd (dst, e, t, e);
34634 else
34636 for (i = 0; i < passes; i++)
34638 rtx t1 = gen_reg_rtx (mode);
34639 rtx g1 = gen_reg_rtx (mode);
34640 rtx h1 = gen_reg_rtx (mode);
34642 rs6000_emit_nmsub (t1, g, h, mhalf);
34643 rs6000_emit_madd (g1, g, t1, g);
34644 rs6000_emit_madd (h1, h, t1, h);
34646 g = g1;
34647 h = h1;
34649 /* Multiply by 2 for 1/rsqrt. */
34650 emit_insn (gen_add3_insn (dst, h, h));
34653 else
34655 rtx t = gen_reg_rtx (mode);
34656 rs6000_emit_nmsub (t, g, h, mhalf);
34657 rs6000_emit_madd (dst, g, t, g);
34660 return;
34663 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
34664 (Power7) targets. DST is the target, and SRC is the argument operand. */
34666 void
34667 rs6000_emit_popcount (rtx dst, rtx src)
34669 machine_mode mode = GET_MODE (dst);
34670 rtx tmp1, tmp2;
34672 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
34673 if (TARGET_POPCNTD)
34675 if (mode == SImode)
34676 emit_insn (gen_popcntdsi2 (dst, src));
34677 else
34678 emit_insn (gen_popcntddi2 (dst, src));
34679 return;
34682 tmp1 = gen_reg_rtx (mode);
34684 if (mode == SImode)
34686 emit_insn (gen_popcntbsi2 (tmp1, src));
34687 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
34688 NULL_RTX, 0);
34689 tmp2 = force_reg (SImode, tmp2);
34690 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
34692 else
34694 emit_insn (gen_popcntbdi2 (tmp1, src));
34695 tmp2 = expand_mult (DImode, tmp1,
34696 GEN_INT ((HOST_WIDE_INT)
34697 0x01010101 << 32 | 0x01010101),
34698 NULL_RTX, 0);
34699 tmp2 = force_reg (DImode, tmp2);
34700 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
34705 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
34706 target, and SRC is the argument operand. */
34708 void
34709 rs6000_emit_parity (rtx dst, rtx src)
34711 machine_mode mode = GET_MODE (dst);
34712 rtx tmp;
34714 tmp = gen_reg_rtx (mode);
34716 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
34717 if (TARGET_CMPB)
34719 if (mode == SImode)
34721 emit_insn (gen_popcntbsi2 (tmp, src));
34722 emit_insn (gen_paritysi2_cmpb (dst, tmp));
34724 else
34726 emit_insn (gen_popcntbdi2 (tmp, src));
34727 emit_insn (gen_paritydi2_cmpb (dst, tmp));
34729 return;
34732 if (mode == SImode)
34734 /* Is mult+shift >= shift+xor+shift+xor? */
34735 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
34737 rtx tmp1, tmp2, tmp3, tmp4;
34739 tmp1 = gen_reg_rtx (SImode);
34740 emit_insn (gen_popcntbsi2 (tmp1, src));
34742 tmp2 = gen_reg_rtx (SImode);
34743 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
34744 tmp3 = gen_reg_rtx (SImode);
34745 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
34747 tmp4 = gen_reg_rtx (SImode);
34748 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
34749 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
34751 else
34752 rs6000_emit_popcount (tmp, src);
34753 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
34755 else
34757 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
34758 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
34760 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
34762 tmp1 = gen_reg_rtx (DImode);
34763 emit_insn (gen_popcntbdi2 (tmp1, src));
34765 tmp2 = gen_reg_rtx (DImode);
34766 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
34767 tmp3 = gen_reg_rtx (DImode);
34768 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
34770 tmp4 = gen_reg_rtx (DImode);
34771 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
34772 tmp5 = gen_reg_rtx (DImode);
34773 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
34775 tmp6 = gen_reg_rtx (DImode);
34776 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
34777 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
34779 else
34780 rs6000_emit_popcount (tmp, src);
34781 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
34785 /* Expand an Altivec constant permutation for little endian mode.
34786 OP0 and OP1 are the input vectors and TARGET is the output vector.
34787 SEL specifies the constant permutation vector.
34789 There are two issues: First, the two input operands must be
34790 swapped so that together they form a double-wide array in LE
34791 order. Second, the vperm instruction has surprising behavior
34792 in LE mode: it interprets the elements of the source vectors
34793 in BE mode ("left to right") and interprets the elements of
34794 the destination vector in LE mode ("right to left"). To
34795 correct for this, we must subtract each element of the permute
34796 control vector from 31.
34798 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
34799 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
34800 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
34801 serve as the permute control vector. Then, in BE mode,
34803 vperm 9,10,11,12
34805 places the desired result in vr9. However, in LE mode the
34806 vector contents will be
34808 vr10 = 00000003 00000002 00000001 00000000
34809 vr11 = 00000007 00000006 00000005 00000004
34811 The result of the vperm using the same permute control vector is
34813 vr9 = 05000000 07000000 01000000 03000000
34815 That is, the leftmost 4 bytes of vr10 are interpreted as the
34816 source for the rightmost 4 bytes of vr9, and so on.
34818 If we change the permute control vector to
34820 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
34822 and issue
34824 vperm 9,11,10,12
34826 we get the desired
34828 vr9 = 00000006 00000004 00000002 00000000. */
34830 static void
34831 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
34832 const vec_perm_indices &sel)
34834 unsigned int i;
34835 rtx perm[16];
34836 rtx constv, unspec;
34838 /* Unpack and adjust the constant selector. */
34839 for (i = 0; i < 16; ++i)
34841 unsigned int elt = 31 - (sel[i] & 31);
34842 perm[i] = GEN_INT (elt);
34845 /* Expand to a permute, swapping the inputs and using the
34846 adjusted selector. */
34847 if (!REG_P (op0))
34848 op0 = force_reg (V16QImode, op0);
34849 if (!REG_P (op1))
34850 op1 = force_reg (V16QImode, op1);
34852 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
34853 constv = force_reg (V16QImode, constv);
34854 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
34855 UNSPEC_VPERM);
34856 if (!REG_P (target))
34858 rtx tmp = gen_reg_rtx (V16QImode);
34859 emit_move_insn (tmp, unspec);
34860 unspec = tmp;
34863 emit_move_insn (target, unspec);
34866 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
34867 permute control vector. But here it's not a constant, so we must
34868 generate a vector NAND or NOR to do the adjustment. */
34870 void
34871 altivec_expand_vec_perm_le (rtx operands[4])
34873 rtx notx, iorx, unspec;
34874 rtx target = operands[0];
34875 rtx op0 = operands[1];
34876 rtx op1 = operands[2];
34877 rtx sel = operands[3];
34878 rtx tmp = target;
34879 rtx norreg = gen_reg_rtx (V16QImode);
34880 machine_mode mode = GET_MODE (target);
34882 /* Get everything in regs so the pattern matches. */
34883 if (!REG_P (op0))
34884 op0 = force_reg (mode, op0);
34885 if (!REG_P (op1))
34886 op1 = force_reg (mode, op1);
34887 if (!REG_P (sel))
34888 sel = force_reg (V16QImode, sel);
34889 if (!REG_P (target))
34890 tmp = gen_reg_rtx (mode);
34892 if (TARGET_P9_VECTOR)
34894 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
34895 UNSPEC_VPERMR);
34897 else
34899 /* Invert the selector with a VNAND if available, else a VNOR.
34900 The VNAND is preferred for future fusion opportunities. */
34901 notx = gen_rtx_NOT (V16QImode, sel);
34902 iorx = (TARGET_P8_VECTOR
34903 ? gen_rtx_IOR (V16QImode, notx, notx)
34904 : gen_rtx_AND (V16QImode, notx, notx));
34905 emit_insn (gen_rtx_SET (norreg, iorx));
34907 /* Permute with operands reversed and adjusted selector. */
34908 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
34909 UNSPEC_VPERM);
34912 /* Copy into target, possibly by way of a register. */
34913 if (!REG_P (target))
34915 emit_move_insn (tmp, unspec);
34916 unspec = tmp;
34919 emit_move_insn (target, unspec);
34922 /* Expand an Altivec constant permutation. Return true if we match
34923 an efficient implementation; false to fall back to VPERM.
34925 OP0 and OP1 are the input vectors and TARGET is the output vector.
34926 SEL specifies the constant permutation vector. */
34928 static bool
34929 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
34930 const vec_perm_indices &sel)
34932 struct altivec_perm_insn {
34933 HOST_WIDE_INT mask;
34934 enum insn_code impl;
34935 unsigned char perm[16];
34937 static const struct altivec_perm_insn patterns[] = {
34938 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
34939 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
34940 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
34941 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
34942 { OPTION_MASK_ALTIVEC,
34943 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
34944 : CODE_FOR_altivec_vmrglb_direct),
34945 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
34946 { OPTION_MASK_ALTIVEC,
34947 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
34948 : CODE_FOR_altivec_vmrglh_direct),
34949 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
34950 { OPTION_MASK_ALTIVEC,
34951 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
34952 : CODE_FOR_altivec_vmrglw_direct),
34953 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
34954 { OPTION_MASK_ALTIVEC,
34955 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
34956 : CODE_FOR_altivec_vmrghb_direct),
34957 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
34958 { OPTION_MASK_ALTIVEC,
34959 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
34960 : CODE_FOR_altivec_vmrghh_direct),
34961 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
34962 { OPTION_MASK_ALTIVEC,
34963 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
34964 : CODE_FOR_altivec_vmrghw_direct),
34965 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
34966 { OPTION_MASK_P8_VECTOR,
34967 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
34968 : CODE_FOR_p8_vmrgow_v4sf_direct),
34969 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
34970 { OPTION_MASK_P8_VECTOR,
34971 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
34972 : CODE_FOR_p8_vmrgew_v4sf_direct),
34973 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
34976 unsigned int i, j, elt, which;
34977 unsigned char perm[16];
34978 rtx x;
34979 bool one_vec;
34981 /* Unpack the constant selector. */
34982 for (i = which = 0; i < 16; ++i)
34984 elt = sel[i] & 31;
34985 which |= (elt < 16 ? 1 : 2);
34986 perm[i] = elt;
34989 /* Simplify the constant selector based on operands. */
34990 switch (which)
34992 default:
34993 gcc_unreachable ();
34995 case 3:
34996 one_vec = false;
34997 if (!rtx_equal_p (op0, op1))
34998 break;
34999 /* FALLTHRU */
35001 case 2:
35002 for (i = 0; i < 16; ++i)
35003 perm[i] &= 15;
35004 op0 = op1;
35005 one_vec = true;
35006 break;
35008 case 1:
35009 op1 = op0;
35010 one_vec = true;
35011 break;
35014 /* Look for splat patterns. */
35015 if (one_vec)
35017 elt = perm[0];
35019 for (i = 0; i < 16; ++i)
35020 if (perm[i] != elt)
35021 break;
35022 if (i == 16)
35024 if (!BYTES_BIG_ENDIAN)
35025 elt = 15 - elt;
35026 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35027 return true;
35030 if (elt % 2 == 0)
35032 for (i = 0; i < 16; i += 2)
35033 if (perm[i] != elt || perm[i + 1] != elt + 1)
35034 break;
35035 if (i == 16)
35037 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35038 x = gen_reg_rtx (V8HImode);
35039 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35040 GEN_INT (field)));
35041 emit_move_insn (target, gen_lowpart (V16QImode, x));
35042 return true;
35046 if (elt % 4 == 0)
35048 for (i = 0; i < 16; i += 4)
35049 if (perm[i] != elt
35050 || perm[i + 1] != elt + 1
35051 || perm[i + 2] != elt + 2
35052 || perm[i + 3] != elt + 3)
35053 break;
35054 if (i == 16)
35056 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35057 x = gen_reg_rtx (V4SImode);
35058 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35059 GEN_INT (field)));
35060 emit_move_insn (target, gen_lowpart (V16QImode, x));
35061 return true;
35066 /* Look for merge and pack patterns. */
35067 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35069 bool swapped;
35071 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35072 continue;
35074 elt = patterns[j].perm[0];
35075 if (perm[0] == elt)
35076 swapped = false;
35077 else if (perm[0] == elt + 16)
35078 swapped = true;
35079 else
35080 continue;
35081 for (i = 1; i < 16; ++i)
35083 elt = patterns[j].perm[i];
35084 if (swapped)
35085 elt = (elt >= 16 ? elt - 16 : elt + 16);
35086 else if (one_vec && elt >= 16)
35087 elt -= 16;
35088 if (perm[i] != elt)
35089 break;
35091 if (i == 16)
35093 enum insn_code icode = patterns[j].impl;
35094 machine_mode omode = insn_data[icode].operand[0].mode;
35095 machine_mode imode = insn_data[icode].operand[1].mode;
35097 /* For little-endian, don't use vpkuwum and vpkuhum if the
35098 underlying vector type is not V4SI and V8HI, respectively.
35099 For example, using vpkuwum with a V8HI picks up the even
35100 halfwords (BE numbering) when the even halfwords (LE
35101 numbering) are what we need. */
35102 if (!BYTES_BIG_ENDIAN
35103 && icode == CODE_FOR_altivec_vpkuwum_direct
35104 && ((GET_CODE (op0) == REG
35105 && GET_MODE (op0) != V4SImode)
35106 || (GET_CODE (op0) == SUBREG
35107 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35108 continue;
35109 if (!BYTES_BIG_ENDIAN
35110 && icode == CODE_FOR_altivec_vpkuhum_direct
35111 && ((GET_CODE (op0) == REG
35112 && GET_MODE (op0) != V8HImode)
35113 || (GET_CODE (op0) == SUBREG
35114 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35115 continue;
35117 /* For little-endian, the two input operands must be swapped
35118 (or swapped back) to ensure proper right-to-left numbering
35119 from 0 to 2N-1. */
35120 if (swapped ^ !BYTES_BIG_ENDIAN)
35121 std::swap (op0, op1);
35122 if (imode != V16QImode)
35124 op0 = gen_lowpart (imode, op0);
35125 op1 = gen_lowpart (imode, op1);
35127 if (omode == V16QImode)
35128 x = target;
35129 else
35130 x = gen_reg_rtx (omode);
35131 emit_insn (GEN_FCN (icode) (x, op0, op1));
35132 if (omode != V16QImode)
35133 emit_move_insn (target, gen_lowpart (V16QImode, x));
35134 return true;
35138 if (!BYTES_BIG_ENDIAN)
35140 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
35141 return true;
35144 return false;
35147 /* Expand a VSX Permute Doubleword constant permutation.
35148 Return true if we match an efficient implementation. */
35150 static bool
35151 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35152 unsigned char perm0, unsigned char perm1)
35154 rtx x;
35156 /* If both selectors come from the same operand, fold to single op. */
35157 if ((perm0 & 2) == (perm1 & 2))
35159 if (perm0 & 2)
35160 op0 = op1;
35161 else
35162 op1 = op0;
35164 /* If both operands are equal, fold to simpler permutation. */
35165 if (rtx_equal_p (op0, op1))
35167 perm0 = perm0 & 1;
35168 perm1 = (perm1 & 1) + 2;
35170 /* If the first selector comes from the second operand, swap. */
35171 else if (perm0 & 2)
35173 if (perm1 & 2)
35174 return false;
35175 perm0 -= 2;
35176 perm1 += 2;
35177 std::swap (op0, op1);
35179 /* If the second selector does not come from the second operand, fail. */
35180 else if ((perm1 & 2) == 0)
35181 return false;
35183 /* Success! */
35184 if (target != NULL)
35186 machine_mode vmode, dmode;
35187 rtvec v;
35189 vmode = GET_MODE (target);
35190 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35191 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
35192 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35193 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35194 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35195 emit_insn (gen_rtx_SET (target, x));
35197 return true;
35200 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
35202 static bool
35203 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
35204 rtx op1, const vec_perm_indices &sel)
35206 bool testing_p = !target;
35208 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35209 if (TARGET_ALTIVEC && testing_p)
35210 return true;
35212 /* Check for ps_merge* or xxpermdi insns. */
35213 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
35215 if (testing_p)
35217 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35218 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35220 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
35221 return true;
35224 if (TARGET_ALTIVEC)
35226 /* Force the target-independent code to lower to V16QImode. */
35227 if (vmode != V16QImode)
35228 return false;
35229 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
35230 return true;
35233 return false;
35236 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
35237 OP0 and OP1 are the input vectors and TARGET is the output vector.
35238 PERM specifies the constant permutation vector. */
35240 static void
35241 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35242 machine_mode vmode, const vec_perm_builder &perm)
35244 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
35245 if (x != target)
35246 emit_move_insn (target, x);
35249 /* Expand an extract even operation. */
35251 void
35252 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35254 machine_mode vmode = GET_MODE (target);
35255 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35256 vec_perm_builder perm (nelt, nelt, 1);
35258 for (i = 0; i < nelt; i++)
35259 perm.quick_push (i * 2);
35261 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35264 /* Expand a vector interleave operation. */
35266 void
35267 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35269 machine_mode vmode = GET_MODE (target);
35270 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35271 vec_perm_builder perm (nelt, nelt, 1);
35273 high = (highp ? 0 : nelt / 2);
35274 for (i = 0; i < nelt / 2; i++)
35276 perm.quick_push (i + high);
35277 perm.quick_push (i + nelt + high);
35280 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35283 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35284 void
35285 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35287 HOST_WIDE_INT hwi_scale (scale);
35288 REAL_VALUE_TYPE r_pow;
35289 rtvec v = rtvec_alloc (2);
35290 rtx elt;
35291 rtx scale_vec = gen_reg_rtx (V2DFmode);
35292 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35293 elt = const_double_from_real_value (r_pow, DFmode);
35294 RTVEC_ELT (v, 0) = elt;
35295 RTVEC_ELT (v, 1) = elt;
35296 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35297 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35300 /* Return an RTX representing where to find the function value of a
35301 function returning MODE. */
35302 static rtx
35303 rs6000_complex_function_value (machine_mode mode)
35305 unsigned int regno;
35306 rtx r1, r2;
35307 machine_mode inner = GET_MODE_INNER (mode);
35308 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35310 if (TARGET_FLOAT128_TYPE
35311 && (mode == KCmode
35312 || (mode == TCmode && TARGET_IEEEQUAD)))
35313 regno = ALTIVEC_ARG_RETURN;
35315 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35316 regno = FP_ARG_RETURN;
35318 else
35320 regno = GP_ARG_RETURN;
35322 /* 32-bit is OK since it'll go in r3/r4. */
35323 if (TARGET_32BIT && inner_bytes >= 4)
35324 return gen_rtx_REG (mode, regno);
35327 if (inner_bytes >= 8)
35328 return gen_rtx_REG (mode, regno);
35330 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35331 const0_rtx);
35332 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35333 GEN_INT (inner_bytes));
35334 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35337 /* Return an rtx describing a return value of MODE as a PARALLEL
35338 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35339 stride REG_STRIDE. */
35341 static rtx
35342 rs6000_parallel_return (machine_mode mode,
35343 int n_elts, machine_mode elt_mode,
35344 unsigned int regno, unsigned int reg_stride)
35346 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35348 int i;
35349 for (i = 0; i < n_elts; i++)
35351 rtx r = gen_rtx_REG (elt_mode, regno);
35352 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35353 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35354 regno += reg_stride;
35357 return par;
35360 /* Target hook for TARGET_FUNCTION_VALUE.
35362 An integer value is in r3 and a floating-point value is in fp1,
35363 unless -msoft-float. */
35365 static rtx
35366 rs6000_function_value (const_tree valtype,
35367 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35368 bool outgoing ATTRIBUTE_UNUSED)
35370 machine_mode mode;
35371 unsigned int regno;
35372 machine_mode elt_mode;
35373 int n_elts;
35375 /* Special handling for structs in darwin64. */
35376 if (TARGET_MACHO
35377 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35379 CUMULATIVE_ARGS valcum;
35380 rtx valret;
35382 valcum.words = 0;
35383 valcum.fregno = FP_ARG_MIN_REG;
35384 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35385 /* Do a trial code generation as if this were going to be passed as
35386 an argument; if any part goes in memory, we return NULL. */
35387 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35388 if (valret)
35389 return valret;
35390 /* Otherwise fall through to standard ABI rules. */
35393 mode = TYPE_MODE (valtype);
35395 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35396 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35398 int first_reg, n_regs;
35400 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35402 /* _Decimal128 must use even/odd register pairs. */
35403 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35404 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35406 else
35408 first_reg = ALTIVEC_ARG_RETURN;
35409 n_regs = 1;
35412 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35415 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35416 if (TARGET_32BIT && TARGET_POWERPC64)
35417 switch (mode)
35419 default:
35420 break;
35421 case E_DImode:
35422 case E_SCmode:
35423 case E_DCmode:
35424 case E_TCmode:
35425 int count = GET_MODE_SIZE (mode) / 4;
35426 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35429 if ((INTEGRAL_TYPE_P (valtype)
35430 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35431 || POINTER_TYPE_P (valtype))
35432 mode = TARGET_32BIT ? SImode : DImode;
35434 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35435 /* _Decimal128 must use an even/odd register pair. */
35436 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35437 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
35438 && !FLOAT128_VECTOR_P (mode))
35439 regno = FP_ARG_RETURN;
35440 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35441 && targetm.calls.split_complex_arg)
35442 return rs6000_complex_function_value (mode);
35443 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35444 return register is used in both cases, and we won't see V2DImode/V2DFmode
35445 for pure altivec, combine the two cases. */
35446 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35447 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35448 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35449 regno = ALTIVEC_ARG_RETURN;
35450 else
35451 regno = GP_ARG_RETURN;
35453 return gen_rtx_REG (mode, regno);
35456 /* Define how to find the value returned by a library function
35457 assuming the value has mode MODE. */
35459 rs6000_libcall_value (machine_mode mode)
35461 unsigned int regno;
35463 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35464 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35465 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35467 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35468 /* _Decimal128 must use an even/odd register pair. */
35469 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35470 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
35471 regno = FP_ARG_RETURN;
35472 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35473 return register is used in both cases, and we won't see V2DImode/V2DFmode
35474 for pure altivec, combine the two cases. */
35475 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35476 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35477 regno = ALTIVEC_ARG_RETURN;
35478 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35479 return rs6000_complex_function_value (mode);
35480 else
35481 regno = GP_ARG_RETURN;
35483 return gen_rtx_REG (mode, regno);
35486 /* Compute register pressure classes. We implement the target hook to avoid
35487 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
35488 lead to incorrect estimates of number of available registers and therefor
35489 increased register pressure/spill. */
35490 static int
35491 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
35493 int n;
35495 n = 0;
35496 pressure_classes[n++] = GENERAL_REGS;
35497 if (TARGET_VSX)
35498 pressure_classes[n++] = VSX_REGS;
35499 else
35501 if (TARGET_ALTIVEC)
35502 pressure_classes[n++] = ALTIVEC_REGS;
35503 if (TARGET_HARD_FLOAT)
35504 pressure_classes[n++] = FLOAT_REGS;
35506 pressure_classes[n++] = CR_REGS;
35507 pressure_classes[n++] = SPECIAL_REGS;
35509 return n;
35512 /* Given FROM and TO register numbers, say whether this elimination is allowed.
35513 Frame pointer elimination is automatically handled.
35515 For the RS/6000, if frame pointer elimination is being done, we would like
35516 to convert ap into fp, not sp.
35518 We need r30 if -mminimal-toc was specified, and there are constant pool
35519 references. */
35521 static bool
35522 rs6000_can_eliminate (const int from, const int to)
35524 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
35525 ? ! frame_pointer_needed
35526 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
35527 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
35528 || constant_pool_empty_p ()
35529 : true);
35532 /* Define the offset between two registers, FROM to be eliminated and its
35533 replacement TO, at the start of a routine. */
35534 HOST_WIDE_INT
35535 rs6000_initial_elimination_offset (int from, int to)
35537 rs6000_stack_t *info = rs6000_stack_info ();
35538 HOST_WIDE_INT offset;
35540 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35541 offset = info->push_p ? 0 : -info->total_size;
35542 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35544 offset = info->push_p ? 0 : -info->total_size;
35545 if (FRAME_GROWS_DOWNWARD)
35546 offset += info->fixed_size + info->vars_size + info->parm_size;
35548 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35549 offset = FRAME_GROWS_DOWNWARD
35550 ? info->fixed_size + info->vars_size + info->parm_size
35551 : 0;
35552 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35553 offset = info->total_size;
35554 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35555 offset = info->push_p ? info->total_size : 0;
35556 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
35557 offset = 0;
35558 else
35559 gcc_unreachable ();
35561 return offset;
35564 /* Fill in sizes of registers used by unwinder. */
35566 static void
35567 rs6000_init_dwarf_reg_sizes_extra (tree address)
35569 if (TARGET_MACHO && ! TARGET_ALTIVEC)
35571 int i;
35572 machine_mode mode = TYPE_MODE (char_type_node);
35573 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
35574 rtx mem = gen_rtx_MEM (BLKmode, addr);
35575 rtx value = gen_int_mode (16, mode);
35577 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
35578 The unwinder still needs to know the size of Altivec registers. */
35580 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
35582 int column = DWARF_REG_TO_UNWIND_COLUMN
35583 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
35584 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
35586 emit_move_insn (adjust_address (mem, mode, offset), value);
35591 /* Map internal gcc register numbers to debug format register numbers.
35592 FORMAT specifies the type of debug register number to use:
35593 0 -- debug information, except for frame-related sections
35594 1 -- DWARF .debug_frame section
35595 2 -- DWARF .eh_frame section */
35597 unsigned int
35598 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
35600 /* Except for the above, we use the internal number for non-DWARF
35601 debug information, and also for .eh_frame. */
35602 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
35603 return regno;
35605 /* On some platforms, we use the standard DWARF register
35606 numbering for .debug_info and .debug_frame. */
35607 #ifdef RS6000_USE_DWARF_NUMBERING
35608 if (regno <= 63)
35609 return regno;
35610 if (regno == LR_REGNO)
35611 return 108;
35612 if (regno == CTR_REGNO)
35613 return 109;
35614 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
35615 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
35616 The actual code emitted saves the whole of CR, so we map CR2_REGNO
35617 to the DWARF reg for CR. */
35618 if (format == 1 && regno == CR2_REGNO)
35619 return 64;
35620 if (CR_REGNO_P (regno))
35621 return regno - CR0_REGNO + 86;
35622 if (regno == CA_REGNO)
35623 return 101; /* XER */
35624 if (ALTIVEC_REGNO_P (regno))
35625 return regno - FIRST_ALTIVEC_REGNO + 1124;
35626 if (regno == VRSAVE_REGNO)
35627 return 356;
35628 if (regno == VSCR_REGNO)
35629 return 67;
35630 #endif
35631 return regno;
35634 /* target hook eh_return_filter_mode */
35635 static scalar_int_mode
35636 rs6000_eh_return_filter_mode (void)
35638 return TARGET_32BIT ? SImode : word_mode;
35641 /* Target hook for scalar_mode_supported_p. */
35642 static bool
35643 rs6000_scalar_mode_supported_p (scalar_mode mode)
35645 /* -m32 does not support TImode. This is the default, from
35646 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
35647 same ABI as for -m32. But default_scalar_mode_supported_p allows
35648 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
35649 for -mpowerpc64. */
35650 if (TARGET_32BIT && mode == TImode)
35651 return false;
35653 if (DECIMAL_FLOAT_MODE_P (mode))
35654 return default_decimal_float_supported_p ();
35655 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
35656 return true;
35657 else
35658 return default_scalar_mode_supported_p (mode);
35661 /* Target hook for vector_mode_supported_p. */
35662 static bool
35663 rs6000_vector_mode_supported_p (machine_mode mode)
35665 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
35666 128-bit, the compiler might try to widen IEEE 128-bit to IBM
35667 double-double. */
35668 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
35669 return true;
35671 else
35672 return false;
35675 /* Target hook for floatn_mode. */
35676 static opt_scalar_float_mode
35677 rs6000_floatn_mode (int n, bool extended)
35679 if (extended)
35681 switch (n)
35683 case 32:
35684 return DFmode;
35686 case 64:
35687 if (TARGET_FLOAT128_TYPE)
35688 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35689 else
35690 return opt_scalar_float_mode ();
35692 case 128:
35693 return opt_scalar_float_mode ();
35695 default:
35696 /* Those are the only valid _FloatNx types. */
35697 gcc_unreachable ();
35700 else
35702 switch (n)
35704 case 32:
35705 return SFmode;
35707 case 64:
35708 return DFmode;
35710 case 128:
35711 if (TARGET_FLOAT128_TYPE)
35712 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35713 else
35714 return opt_scalar_float_mode ();
35716 default:
35717 return opt_scalar_float_mode ();
35723 /* Target hook for c_mode_for_suffix. */
35724 static machine_mode
35725 rs6000_c_mode_for_suffix (char suffix)
35727 if (TARGET_FLOAT128_TYPE)
35729 if (suffix == 'q' || suffix == 'Q')
35730 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35732 /* At the moment, we are not defining a suffix for IBM extended double.
35733 If/when the default for -mabi=ieeelongdouble is changed, and we want
35734 to support __ibm128 constants in legacy library code, we may need to
35735 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
35736 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
35737 __float80 constants. */
35740 return VOIDmode;
35743 /* Target hook for invalid_arg_for_unprototyped_fn. */
35744 static const char *
35745 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
35747 return (!rs6000_darwin64_abi
35748 && typelist == 0
35749 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
35750 && (funcdecl == NULL_TREE
35751 || (TREE_CODE (funcdecl) == FUNCTION_DECL
35752 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
35753 ? N_("AltiVec argument passed to unprototyped function")
35754 : NULL;
35757 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
35758 setup by using __stack_chk_fail_local hidden function instead of
35759 calling __stack_chk_fail directly. Otherwise it is better to call
35760 __stack_chk_fail directly. */
35762 static tree ATTRIBUTE_UNUSED
35763 rs6000_stack_protect_fail (void)
35765 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
35766 ? default_hidden_stack_protect_fail ()
35767 : default_external_stack_protect_fail ();
35770 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
35772 #if TARGET_ELF
35773 static unsigned HOST_WIDE_INT
35774 rs6000_asan_shadow_offset (void)
35776 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
35778 #endif
35780 /* Mask options that we want to support inside of attribute((target)) and
35781 #pragma GCC target operations. Note, we do not include things like
35782 64/32-bit, endianness, hard/soft floating point, etc. that would have
35783 different calling sequences. */
35785 struct rs6000_opt_mask {
35786 const char *name; /* option name */
35787 HOST_WIDE_INT mask; /* mask to set */
35788 bool invert; /* invert sense of mask */
35789 bool valid_target; /* option is a target option */
35792 static struct rs6000_opt_mask const rs6000_opt_masks[] =
35794 { "altivec", OPTION_MASK_ALTIVEC, false, true },
35795 { "cmpb", OPTION_MASK_CMPB, false, true },
35796 { "crypto", OPTION_MASK_CRYPTO, false, true },
35797 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
35798 { "dlmzb", OPTION_MASK_DLMZB, false, true },
35799 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
35800 false, true },
35801 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
35802 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
35803 { "fprnd", OPTION_MASK_FPRND, false, true },
35804 { "hard-dfp", OPTION_MASK_DFP, false, true },
35805 { "htm", OPTION_MASK_HTM, false, true },
35806 { "isel", OPTION_MASK_ISEL, false, true },
35807 { "mfcrf", OPTION_MASK_MFCRF, false, true },
35808 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
35809 { "modulo", OPTION_MASK_MODULO, false, true },
35810 { "mulhw", OPTION_MASK_MULHW, false, true },
35811 { "multiple", OPTION_MASK_MULTIPLE, false, true },
35812 { "popcntb", OPTION_MASK_POPCNTB, false, true },
35813 { "popcntd", OPTION_MASK_POPCNTD, false, true },
35814 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
35815 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
35816 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
35817 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
35818 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
35819 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
35820 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
35821 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
35822 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
35823 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
35824 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
35825 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
35826 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
35827 { "string", 0, false, true },
35828 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
35829 { "update", OPTION_MASK_NO_UPDATE, true , true },
35830 { "vsx", OPTION_MASK_VSX, false, true },
35831 #ifdef OPTION_MASK_64BIT
35832 #if TARGET_AIX_OS
35833 { "aix64", OPTION_MASK_64BIT, false, false },
35834 { "aix32", OPTION_MASK_64BIT, true, false },
35835 #else
35836 { "64", OPTION_MASK_64BIT, false, false },
35837 { "32", OPTION_MASK_64BIT, true, false },
35838 #endif
35839 #endif
35840 #ifdef OPTION_MASK_EABI
35841 { "eabi", OPTION_MASK_EABI, false, false },
35842 #endif
35843 #ifdef OPTION_MASK_LITTLE_ENDIAN
35844 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
35845 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
35846 #endif
35847 #ifdef OPTION_MASK_RELOCATABLE
35848 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
35849 #endif
35850 #ifdef OPTION_MASK_STRICT_ALIGN
35851 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
35852 #endif
35853 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
35854 { "string", 0, false, false },
35857 /* Builtin mask mapping for printing the flags. */
35858 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
35860 { "altivec", RS6000_BTM_ALTIVEC, false, false },
35861 { "vsx", RS6000_BTM_VSX, false, false },
35862 { "fre", RS6000_BTM_FRE, false, false },
35863 { "fres", RS6000_BTM_FRES, false, false },
35864 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
35865 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
35866 { "popcntd", RS6000_BTM_POPCNTD, false, false },
35867 { "cell", RS6000_BTM_CELL, false, false },
35868 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
35869 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
35870 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
35871 { "crypto", RS6000_BTM_CRYPTO, false, false },
35872 { "htm", RS6000_BTM_HTM, false, false },
35873 { "hard-dfp", RS6000_BTM_DFP, false, false },
35874 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
35875 { "long-double-128", RS6000_BTM_LDBL128, false, false },
35876 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
35877 { "float128", RS6000_BTM_FLOAT128, false, false },
35878 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
35881 /* Option variables that we want to support inside attribute((target)) and
35882 #pragma GCC target operations. */
35884 struct rs6000_opt_var {
35885 const char *name; /* option name */
35886 size_t global_offset; /* offset of the option in global_options. */
35887 size_t target_offset; /* offset of the option in target options. */
35890 static struct rs6000_opt_var const rs6000_opt_vars[] =
35892 { "friz",
35893 offsetof (struct gcc_options, x_TARGET_FRIZ),
35894 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
35895 { "avoid-indexed-addresses",
35896 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
35897 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
35898 { "longcall",
35899 offsetof (struct gcc_options, x_rs6000_default_long_calls),
35900 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
35901 { "optimize-swaps",
35902 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
35903 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
35904 { "allow-movmisalign",
35905 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
35906 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
35907 { "sched-groups",
35908 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
35909 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
35910 { "always-hint",
35911 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
35912 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
35913 { "align-branch-targets",
35914 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
35915 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
35916 { "tls-markers",
35917 offsetof (struct gcc_options, x_tls_markers),
35918 offsetof (struct cl_target_option, x_tls_markers), },
35919 { "sched-prolog",
35920 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
35921 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
35922 { "sched-epilog",
35923 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
35924 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
35925 { "speculate-indirect-jumps",
35926 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
35927 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
35930 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
35931 parsing. Return true if there were no errors. */
35933 static bool
35934 rs6000_inner_target_options (tree args, bool attr_p)
35936 bool ret = true;
35938 if (args == NULL_TREE)
35941 else if (TREE_CODE (args) == STRING_CST)
35943 char *p = ASTRDUP (TREE_STRING_POINTER (args));
35944 char *q;
35946 while ((q = strtok (p, ",")) != NULL)
35948 bool error_p = false;
35949 bool not_valid_p = false;
35950 const char *cpu_opt = NULL;
35952 p = NULL;
35953 if (strncmp (q, "cpu=", 4) == 0)
35955 int cpu_index = rs6000_cpu_name_lookup (q+4);
35956 if (cpu_index >= 0)
35957 rs6000_cpu_index = cpu_index;
35958 else
35960 error_p = true;
35961 cpu_opt = q+4;
35964 else if (strncmp (q, "tune=", 5) == 0)
35966 int tune_index = rs6000_cpu_name_lookup (q+5);
35967 if (tune_index >= 0)
35968 rs6000_tune_index = tune_index;
35969 else
35971 error_p = true;
35972 cpu_opt = q+5;
35975 else
35977 size_t i;
35978 bool invert = false;
35979 char *r = q;
35981 error_p = true;
35982 if (strncmp (r, "no-", 3) == 0)
35984 invert = true;
35985 r += 3;
35988 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
35989 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
35991 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
35993 if (!rs6000_opt_masks[i].valid_target)
35994 not_valid_p = true;
35995 else
35997 error_p = false;
35998 rs6000_isa_flags_explicit |= mask;
36000 /* VSX needs altivec, so -mvsx automagically sets
36001 altivec and disables -mavoid-indexed-addresses. */
36002 if (!invert)
36004 if (mask == OPTION_MASK_VSX)
36006 mask |= OPTION_MASK_ALTIVEC;
36007 TARGET_AVOID_XFORM = 0;
36011 if (rs6000_opt_masks[i].invert)
36012 invert = !invert;
36014 if (invert)
36015 rs6000_isa_flags &= ~mask;
36016 else
36017 rs6000_isa_flags |= mask;
36019 break;
36022 if (error_p && !not_valid_p)
36024 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36025 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36027 size_t j = rs6000_opt_vars[i].global_offset;
36028 *((int *) ((char *)&global_options + j)) = !invert;
36029 error_p = false;
36030 not_valid_p = false;
36031 break;
36036 if (error_p)
36038 const char *eprefix, *esuffix;
36040 ret = false;
36041 if (attr_p)
36043 eprefix = "__attribute__((__target__(";
36044 esuffix = ")))";
36046 else
36048 eprefix = "#pragma GCC target ";
36049 esuffix = "";
36052 if (cpu_opt)
36053 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
36054 q, esuffix);
36055 else if (not_valid_p)
36056 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
36057 else
36058 error ("%s%qs%s is invalid", eprefix, q, esuffix);
36063 else if (TREE_CODE (args) == TREE_LIST)
36067 tree value = TREE_VALUE (args);
36068 if (value)
36070 bool ret2 = rs6000_inner_target_options (value, attr_p);
36071 if (!ret2)
36072 ret = false;
36074 args = TREE_CHAIN (args);
36076 while (args != NULL_TREE);
36079 else
36081 error ("attribute %<target%> argument not a string");
36082 return false;
36085 return ret;
36088 /* Print out the target options as a list for -mdebug=target. */
36090 static void
36091 rs6000_debug_target_options (tree args, const char *prefix)
36093 if (args == NULL_TREE)
36094 fprintf (stderr, "%s<NULL>", prefix);
36096 else if (TREE_CODE (args) == STRING_CST)
36098 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36099 char *q;
36101 while ((q = strtok (p, ",")) != NULL)
36103 p = NULL;
36104 fprintf (stderr, "%s\"%s\"", prefix, q);
36105 prefix = ", ";
36109 else if (TREE_CODE (args) == TREE_LIST)
36113 tree value = TREE_VALUE (args);
36114 if (value)
36116 rs6000_debug_target_options (value, prefix);
36117 prefix = ", ";
36119 args = TREE_CHAIN (args);
36121 while (args != NULL_TREE);
36124 else
36125 gcc_unreachable ();
36127 return;
36131 /* Hook to validate attribute((target("..."))). */
36133 static bool
36134 rs6000_valid_attribute_p (tree fndecl,
36135 tree ARG_UNUSED (name),
36136 tree args,
36137 int flags)
36139 struct cl_target_option cur_target;
36140 bool ret;
36141 tree old_optimize;
36142 tree new_target, new_optimize;
36143 tree func_optimize;
36145 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36147 if (TARGET_DEBUG_TARGET)
36149 tree tname = DECL_NAME (fndecl);
36150 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36151 if (tname)
36152 fprintf (stderr, "function: %.*s\n",
36153 (int) IDENTIFIER_LENGTH (tname),
36154 IDENTIFIER_POINTER (tname));
36155 else
36156 fprintf (stderr, "function: unknown\n");
36158 fprintf (stderr, "args:");
36159 rs6000_debug_target_options (args, " ");
36160 fprintf (stderr, "\n");
36162 if (flags)
36163 fprintf (stderr, "flags: 0x%x\n", flags);
36165 fprintf (stderr, "--------------------\n");
36168 /* attribute((target("default"))) does nothing, beyond
36169 affecting multi-versioning. */
36170 if (TREE_VALUE (args)
36171 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36172 && TREE_CHAIN (args) == NULL_TREE
36173 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36174 return true;
36176 old_optimize = build_optimization_node (&global_options);
36177 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36179 /* If the function changed the optimization levels as well as setting target
36180 options, start with the optimizations specified. */
36181 if (func_optimize && func_optimize != old_optimize)
36182 cl_optimization_restore (&global_options,
36183 TREE_OPTIMIZATION (func_optimize));
36185 /* The target attributes may also change some optimization flags, so update
36186 the optimization options if necessary. */
36187 cl_target_option_save (&cur_target, &global_options);
36188 rs6000_cpu_index = rs6000_tune_index = -1;
36189 ret = rs6000_inner_target_options (args, true);
36191 /* Set up any additional state. */
36192 if (ret)
36194 ret = rs6000_option_override_internal (false);
36195 new_target = build_target_option_node (&global_options);
36197 else
36198 new_target = NULL;
36200 new_optimize = build_optimization_node (&global_options);
36202 if (!new_target)
36203 ret = false;
36205 else if (fndecl)
36207 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36209 if (old_optimize != new_optimize)
36210 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36213 cl_target_option_restore (&global_options, &cur_target);
36215 if (old_optimize != new_optimize)
36216 cl_optimization_restore (&global_options,
36217 TREE_OPTIMIZATION (old_optimize));
36219 return ret;
36223 /* Hook to validate the current #pragma GCC target and set the state, and
36224 update the macros based on what was changed. If ARGS is NULL, then
36225 POP_TARGET is used to reset the options. */
36227 bool
36228 rs6000_pragma_target_parse (tree args, tree pop_target)
36230 tree prev_tree = build_target_option_node (&global_options);
36231 tree cur_tree;
36232 struct cl_target_option *prev_opt, *cur_opt;
36233 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36234 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36236 if (TARGET_DEBUG_TARGET)
36238 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36239 fprintf (stderr, "args:");
36240 rs6000_debug_target_options (args, " ");
36241 fprintf (stderr, "\n");
36243 if (pop_target)
36245 fprintf (stderr, "pop_target:\n");
36246 debug_tree (pop_target);
36248 else
36249 fprintf (stderr, "pop_target: <NULL>\n");
36251 fprintf (stderr, "--------------------\n");
36254 if (! args)
36256 cur_tree = ((pop_target)
36257 ? pop_target
36258 : target_option_default_node);
36259 cl_target_option_restore (&global_options,
36260 TREE_TARGET_OPTION (cur_tree));
36262 else
36264 rs6000_cpu_index = rs6000_tune_index = -1;
36265 if (!rs6000_inner_target_options (args, false)
36266 || !rs6000_option_override_internal (false)
36267 || (cur_tree = build_target_option_node (&global_options))
36268 == NULL_TREE)
36270 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36271 fprintf (stderr, "invalid pragma\n");
36273 return false;
36277 target_option_current_node = cur_tree;
36278 rs6000_activate_target_options (target_option_current_node);
36280 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36281 change the macros that are defined. */
36282 if (rs6000_target_modify_macros_ptr)
36284 prev_opt = TREE_TARGET_OPTION (prev_tree);
36285 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36286 prev_flags = prev_opt->x_rs6000_isa_flags;
36288 cur_opt = TREE_TARGET_OPTION (cur_tree);
36289 cur_flags = cur_opt->x_rs6000_isa_flags;
36290 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36292 diff_bumask = (prev_bumask ^ cur_bumask);
36293 diff_flags = (prev_flags ^ cur_flags);
36295 if ((diff_flags != 0) || (diff_bumask != 0))
36297 /* Delete old macros. */
36298 rs6000_target_modify_macros_ptr (false,
36299 prev_flags & diff_flags,
36300 prev_bumask & diff_bumask);
36302 /* Define new macros. */
36303 rs6000_target_modify_macros_ptr (true,
36304 cur_flags & diff_flags,
36305 cur_bumask & diff_bumask);
36309 return true;
36313 /* Remember the last target of rs6000_set_current_function. */
36314 static GTY(()) tree rs6000_previous_fndecl;
36316 /* Restore target's globals from NEW_TREE and invalidate the
36317 rs6000_previous_fndecl cache. */
36319 void
36320 rs6000_activate_target_options (tree new_tree)
36322 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
36323 if (TREE_TARGET_GLOBALS (new_tree))
36324 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36325 else if (new_tree == target_option_default_node)
36326 restore_target_globals (&default_target_globals);
36327 else
36328 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
36329 rs6000_previous_fndecl = NULL_TREE;
36332 /* Establish appropriate back-end context for processing the function
36333 FNDECL. The argument might be NULL to indicate processing at top
36334 level, outside of any function scope. */
36335 static void
36336 rs6000_set_current_function (tree fndecl)
36338 if (TARGET_DEBUG_TARGET)
36340 fprintf (stderr, "\n==================== rs6000_set_current_function");
36342 if (fndecl)
36343 fprintf (stderr, ", fndecl %s (%p)",
36344 (DECL_NAME (fndecl)
36345 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36346 : "<unknown>"), (void *)fndecl);
36348 if (rs6000_previous_fndecl)
36349 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36351 fprintf (stderr, "\n");
36354 /* Only change the context if the function changes. This hook is called
36355 several times in the course of compiling a function, and we don't want to
36356 slow things down too much or call target_reinit when it isn't safe. */
36357 if (fndecl == rs6000_previous_fndecl)
36358 return;
36360 tree old_tree;
36361 if (rs6000_previous_fndecl == NULL_TREE)
36362 old_tree = target_option_current_node;
36363 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
36364 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
36365 else
36366 old_tree = target_option_default_node;
36368 tree new_tree;
36369 if (fndecl == NULL_TREE)
36371 if (old_tree != target_option_current_node)
36372 new_tree = target_option_current_node;
36373 else
36374 new_tree = NULL_TREE;
36376 else
36378 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36379 if (new_tree == NULL_TREE)
36380 new_tree = target_option_default_node;
36383 if (TARGET_DEBUG_TARGET)
36385 if (new_tree)
36387 fprintf (stderr, "\nnew fndecl target specific options:\n");
36388 debug_tree (new_tree);
36391 if (old_tree)
36393 fprintf (stderr, "\nold fndecl target specific options:\n");
36394 debug_tree (old_tree);
36397 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
36398 fprintf (stderr, "--------------------\n");
36401 if (new_tree && old_tree != new_tree)
36402 rs6000_activate_target_options (new_tree);
36404 if (fndecl)
36405 rs6000_previous_fndecl = fndecl;
36409 /* Save the current options */
36411 static void
36412 rs6000_function_specific_save (struct cl_target_option *ptr,
36413 struct gcc_options *opts)
36415 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36416 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36419 /* Restore the current options */
36421 static void
36422 rs6000_function_specific_restore (struct gcc_options *opts,
36423 struct cl_target_option *ptr)
36426 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36427 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36428 (void) rs6000_option_override_internal (false);
36431 /* Print the current options */
36433 static void
36434 rs6000_function_specific_print (FILE *file, int indent,
36435 struct cl_target_option *ptr)
36437 rs6000_print_isa_options (file, indent, "Isa options set",
36438 ptr->x_rs6000_isa_flags);
36440 rs6000_print_isa_options (file, indent, "Isa options explicit",
36441 ptr->x_rs6000_isa_flags_explicit);
36444 /* Helper function to print the current isa or misc options on a line. */
36446 static void
36447 rs6000_print_options_internal (FILE *file,
36448 int indent,
36449 const char *string,
36450 HOST_WIDE_INT flags,
36451 const char *prefix,
36452 const struct rs6000_opt_mask *opts,
36453 size_t num_elements)
36455 size_t i;
36456 size_t start_column = 0;
36457 size_t cur_column;
36458 size_t max_column = 120;
36459 size_t prefix_len = strlen (prefix);
36460 size_t comma_len = 0;
36461 const char *comma = "";
36463 if (indent)
36464 start_column += fprintf (file, "%*s", indent, "");
36466 if (!flags)
36468 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36469 return;
36472 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36474 /* Print the various mask options. */
36475 cur_column = start_column;
36476 for (i = 0; i < num_elements; i++)
36478 bool invert = opts[i].invert;
36479 const char *name = opts[i].name;
36480 const char *no_str = "";
36481 HOST_WIDE_INT mask = opts[i].mask;
36482 size_t len = comma_len + prefix_len + strlen (name);
36484 if (!invert)
36486 if ((flags & mask) == 0)
36488 no_str = "no-";
36489 len += sizeof ("no-") - 1;
36492 flags &= ~mask;
36495 else
36497 if ((flags & mask) != 0)
36499 no_str = "no-";
36500 len += sizeof ("no-") - 1;
36503 flags |= mask;
36506 cur_column += len;
36507 if (cur_column > max_column)
36509 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
36510 cur_column = start_column + len;
36511 comma = "";
36514 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
36515 comma = ", ";
36516 comma_len = sizeof (", ") - 1;
36519 fputs ("\n", file);
36522 /* Helper function to print the current isa options on a line. */
36524 static void
36525 rs6000_print_isa_options (FILE *file, int indent, const char *string,
36526 HOST_WIDE_INT flags)
36528 rs6000_print_options_internal (file, indent, string, flags, "-m",
36529 &rs6000_opt_masks[0],
36530 ARRAY_SIZE (rs6000_opt_masks));
36533 static void
36534 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
36535 HOST_WIDE_INT flags)
36537 rs6000_print_options_internal (file, indent, string, flags, "",
36538 &rs6000_builtin_mask_names[0],
36539 ARRAY_SIZE (rs6000_builtin_mask_names));
36542 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
36543 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
36544 -mupper-regs-df, etc.).
36546 If the user used -mno-power8-vector, we need to turn off all of the implicit
36547 ISA 2.07 and 3.0 options that relate to the vector unit.
36549 If the user used -mno-power9-vector, we need to turn off all of the implicit
36550 ISA 3.0 options that relate to the vector unit.
36552 This function does not handle explicit options such as the user specifying
36553 -mdirect-move. These are handled in rs6000_option_override_internal, and
36554 the appropriate error is given if needed.
36556 We return a mask of all of the implicit options that should not be enabled
36557 by default. */
36559 static HOST_WIDE_INT
36560 rs6000_disable_incompatible_switches (void)
36562 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
36563 size_t i, j;
36565 static const struct {
36566 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
36567 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
36568 const char *const name; /* name of the switch. */
36569 } flags[] = {
36570 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
36571 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
36572 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
36575 for (i = 0; i < ARRAY_SIZE (flags); i++)
36577 HOST_WIDE_INT no_flag = flags[i].no_flag;
36579 if ((rs6000_isa_flags & no_flag) == 0
36580 && (rs6000_isa_flags_explicit & no_flag) != 0)
36582 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
36583 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
36584 & rs6000_isa_flags
36585 & dep_flags);
36587 if (set_flags)
36589 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
36590 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
36592 set_flags &= ~rs6000_opt_masks[j].mask;
36593 error ("%<-mno-%s%> turns off %<-m%s%>",
36594 flags[i].name,
36595 rs6000_opt_masks[j].name);
36598 gcc_assert (!set_flags);
36601 rs6000_isa_flags &= ~dep_flags;
36602 ignore_masks |= no_flag | dep_flags;
36606 return ignore_masks;
36610 /* Helper function for printing the function name when debugging. */
36612 static const char *
36613 get_decl_name (tree fn)
36615 tree name;
36617 if (!fn)
36618 return "<null>";
36620 name = DECL_NAME (fn);
36621 if (!name)
36622 return "<no-name>";
36624 return IDENTIFIER_POINTER (name);
36627 /* Return the clone id of the target we are compiling code for in a target
36628 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
36629 the priority list for the target clones (ordered from lowest to
36630 highest). */
36632 static int
36633 rs6000_clone_priority (tree fndecl)
36635 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36636 HOST_WIDE_INT isa_masks;
36637 int ret = CLONE_DEFAULT;
36638 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
36639 const char *attrs_str = NULL;
36641 attrs = TREE_VALUE (TREE_VALUE (attrs));
36642 attrs_str = TREE_STRING_POINTER (attrs);
36644 /* Return priority zero for default function. Return the ISA needed for the
36645 function if it is not the default. */
36646 if (strcmp (attrs_str, "default") != 0)
36648 if (fn_opts == NULL_TREE)
36649 fn_opts = target_option_default_node;
36651 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
36652 isa_masks = rs6000_isa_flags;
36653 else
36654 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
36656 for (ret = CLONE_MAX - 1; ret != 0; ret--)
36657 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
36658 break;
36661 if (TARGET_DEBUG_TARGET)
36662 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
36663 get_decl_name (fndecl), ret);
36665 return ret;
36668 /* This compares the priority of target features in function DECL1 and DECL2.
36669 It returns positive value if DECL1 is higher priority, negative value if
36670 DECL2 is higher priority and 0 if they are the same. Note, priorities are
36671 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
36673 static int
36674 rs6000_compare_version_priority (tree decl1, tree decl2)
36676 int priority1 = rs6000_clone_priority (decl1);
36677 int priority2 = rs6000_clone_priority (decl2);
36678 int ret = priority1 - priority2;
36680 if (TARGET_DEBUG_TARGET)
36681 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
36682 get_decl_name (decl1), get_decl_name (decl2), ret);
36684 return ret;
36687 /* Make a dispatcher declaration for the multi-versioned function DECL.
36688 Calls to DECL function will be replaced with calls to the dispatcher
36689 by the front-end. Returns the decl of the dispatcher function. */
36691 static tree
36692 rs6000_get_function_versions_dispatcher (void *decl)
36694 tree fn = (tree) decl;
36695 struct cgraph_node *node = NULL;
36696 struct cgraph_node *default_node = NULL;
36697 struct cgraph_function_version_info *node_v = NULL;
36698 struct cgraph_function_version_info *first_v = NULL;
36700 tree dispatch_decl = NULL;
36702 struct cgraph_function_version_info *default_version_info = NULL;
36703 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36705 if (TARGET_DEBUG_TARGET)
36706 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
36707 get_decl_name (fn));
36709 node = cgraph_node::get (fn);
36710 gcc_assert (node != NULL);
36712 node_v = node->function_version ();
36713 gcc_assert (node_v != NULL);
36715 if (node_v->dispatcher_resolver != NULL)
36716 return node_v->dispatcher_resolver;
36718 /* Find the default version and make it the first node. */
36719 first_v = node_v;
36720 /* Go to the beginning of the chain. */
36721 while (first_v->prev != NULL)
36722 first_v = first_v->prev;
36724 default_version_info = first_v;
36725 while (default_version_info != NULL)
36727 const tree decl2 = default_version_info->this_node->decl;
36728 if (is_function_default_version (decl2))
36729 break;
36730 default_version_info = default_version_info->next;
36733 /* If there is no default node, just return NULL. */
36734 if (default_version_info == NULL)
36735 return NULL;
36737 /* Make default info the first node. */
36738 if (first_v != default_version_info)
36740 default_version_info->prev->next = default_version_info->next;
36741 if (default_version_info->next)
36742 default_version_info->next->prev = default_version_info->prev;
36743 first_v->prev = default_version_info;
36744 default_version_info->next = first_v;
36745 default_version_info->prev = NULL;
36748 default_node = default_version_info->this_node;
36750 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
36751 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36752 "target_clones attribute needs GLIBC (2.23 and newer) that "
36753 "exports hardware capability bits");
36754 #else
36756 if (targetm.has_ifunc_p ())
36758 struct cgraph_function_version_info *it_v = NULL;
36759 struct cgraph_node *dispatcher_node = NULL;
36760 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36762 /* Right now, the dispatching is done via ifunc. */
36763 dispatch_decl = make_dispatcher_decl (default_node->decl);
36765 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36766 gcc_assert (dispatcher_node != NULL);
36767 dispatcher_node->dispatcher_function = 1;
36768 dispatcher_version_info
36769 = dispatcher_node->insert_new_function_version ();
36770 dispatcher_version_info->next = default_version_info;
36771 dispatcher_node->definition = 1;
36773 /* Set the dispatcher for all the versions. */
36774 it_v = default_version_info;
36775 while (it_v != NULL)
36777 it_v->dispatcher_resolver = dispatch_decl;
36778 it_v = it_v->next;
36781 else
36783 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36784 "multiversioning needs ifunc which is not supported "
36785 "on this target");
36787 #endif
36789 return dispatch_decl;
36792 /* Make the resolver function decl to dispatch the versions of a multi-
36793 versioned function, DEFAULT_DECL. Create an empty basic block in the
36794 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
36795 function. */
36797 static tree
36798 make_resolver_func (const tree default_decl,
36799 const tree dispatch_decl,
36800 basic_block *empty_bb)
36802 /* Make the resolver function static. The resolver function returns
36803 void *. */
36804 tree decl_name = clone_function_name (default_decl, "resolver");
36805 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
36806 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
36807 tree decl = build_fn_decl (resolver_name, type);
36808 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36810 DECL_NAME (decl) = decl_name;
36811 TREE_USED (decl) = 1;
36812 DECL_ARTIFICIAL (decl) = 1;
36813 DECL_IGNORED_P (decl) = 0;
36814 TREE_PUBLIC (decl) = 0;
36815 DECL_UNINLINABLE (decl) = 1;
36817 /* Resolver is not external, body is generated. */
36818 DECL_EXTERNAL (decl) = 0;
36819 DECL_EXTERNAL (dispatch_decl) = 0;
36821 DECL_CONTEXT (decl) = NULL_TREE;
36822 DECL_INITIAL (decl) = make_node (BLOCK);
36823 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36825 /* Build result decl and add to function_decl. */
36826 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
36827 DECL_ARTIFICIAL (t) = 1;
36828 DECL_IGNORED_P (t) = 1;
36829 DECL_RESULT (decl) = t;
36831 gimplify_function_tree (decl);
36832 push_cfun (DECL_STRUCT_FUNCTION (decl));
36833 *empty_bb = init_lowered_empty_function (decl, false,
36834 profile_count::uninitialized ());
36836 cgraph_node::add_new_function (decl, true);
36837 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
36839 pop_cfun ();
36841 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
36842 DECL_ATTRIBUTES (dispatch_decl)
36843 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
36845 cgraph_node::create_same_body_alias (dispatch_decl, decl);
36847 return decl;
36850 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
36851 return a pointer to VERSION_DECL if we are running on a machine that
36852 supports the index CLONE_ISA hardware architecture bits. This function will
36853 be called during version dispatch to decide which function version to
36854 execute. It returns the basic block at the end, to which more conditions
36855 can be added. */
36857 static basic_block
36858 add_condition_to_bb (tree function_decl, tree version_decl,
36859 int clone_isa, basic_block new_bb)
36861 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36863 gcc_assert (new_bb != NULL);
36864 gimple_seq gseq = bb_seq (new_bb);
36867 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36868 build_fold_addr_expr (version_decl));
36869 tree result_var = create_tmp_var (ptr_type_node);
36870 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
36871 gimple *return_stmt = gimple_build_return (result_var);
36873 if (clone_isa == CLONE_DEFAULT)
36875 gimple_seq_add_stmt (&gseq, convert_stmt);
36876 gimple_seq_add_stmt (&gseq, return_stmt);
36877 set_bb_seq (new_bb, gseq);
36878 gimple_set_bb (convert_stmt, new_bb);
36879 gimple_set_bb (return_stmt, new_bb);
36880 pop_cfun ();
36881 return new_bb;
36884 tree bool_zero = build_int_cst (bool_int_type_node, 0);
36885 tree cond_var = create_tmp_var (bool_int_type_node);
36886 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
36887 const char *arg_str = rs6000_clone_map[clone_isa].name;
36888 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36889 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36890 gimple_call_set_lhs (call_cond_stmt, cond_var);
36892 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36893 gimple_set_bb (call_cond_stmt, new_bb);
36894 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36896 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
36897 NULL_TREE, NULL_TREE);
36898 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36899 gimple_set_bb (if_else_stmt, new_bb);
36900 gimple_seq_add_stmt (&gseq, if_else_stmt);
36902 gimple_seq_add_stmt (&gseq, convert_stmt);
36903 gimple_seq_add_stmt (&gseq, return_stmt);
36904 set_bb_seq (new_bb, gseq);
36906 basic_block bb1 = new_bb;
36907 edge e12 = split_block (bb1, if_else_stmt);
36908 basic_block bb2 = e12->dest;
36909 e12->flags &= ~EDGE_FALLTHRU;
36910 e12->flags |= EDGE_TRUE_VALUE;
36912 edge e23 = split_block (bb2, return_stmt);
36913 gimple_set_bb (convert_stmt, bb2);
36914 gimple_set_bb (return_stmt, bb2);
36916 basic_block bb3 = e23->dest;
36917 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36919 remove_edge (e23);
36920 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36922 pop_cfun ();
36923 return bb3;
36926 /* This function generates the dispatch function for multi-versioned functions.
36927 DISPATCH_DECL is the function which will contain the dispatch logic.
36928 FNDECLS are the function choices for dispatch, and is a tree chain.
36929 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
36930 code is generated. */
36932 static int
36933 dispatch_function_versions (tree dispatch_decl,
36934 void *fndecls_p,
36935 basic_block *empty_bb)
36937 int ix;
36938 tree ele;
36939 vec<tree> *fndecls;
36940 tree clones[CLONE_MAX];
36942 if (TARGET_DEBUG_TARGET)
36943 fputs ("dispatch_function_versions, top\n", stderr);
36945 gcc_assert (dispatch_decl != NULL
36946 && fndecls_p != NULL
36947 && empty_bb != NULL);
36949 /* fndecls_p is actually a vector. */
36950 fndecls = static_cast<vec<tree> *> (fndecls_p);
36952 /* At least one more version other than the default. */
36953 gcc_assert (fndecls->length () >= 2);
36955 /* The first version in the vector is the default decl. */
36956 memset ((void *) clones, '\0', sizeof (clones));
36957 clones[CLONE_DEFAULT] = (*fndecls)[0];
36959 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
36960 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
36961 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
36962 recent glibc. If we ever need to call __builtin_cpu_init, we would need
36963 to insert the code here to do the call. */
36965 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36967 int priority = rs6000_clone_priority (ele);
36968 if (!clones[priority])
36969 clones[priority] = ele;
36972 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
36973 if (clones[ix])
36975 if (TARGET_DEBUG_TARGET)
36976 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
36977 ix, get_decl_name (clones[ix]));
36979 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
36980 *empty_bb);
36983 return 0;
36986 /* Generate the dispatching code body to dispatch multi-versioned function
36987 DECL. The target hook is called to process the "target" attributes and
36988 provide the code to dispatch the right function at run-time. NODE points
36989 to the dispatcher decl whose body will be created. */
36991 static tree
36992 rs6000_generate_version_dispatcher_body (void *node_p)
36994 tree resolver;
36995 basic_block empty_bb;
36996 struct cgraph_node *node = (cgraph_node *) node_p;
36997 struct cgraph_function_version_info *ninfo = node->function_version ();
36999 if (ninfo->dispatcher_resolver)
37000 return ninfo->dispatcher_resolver;
37002 /* node is going to be an alias, so remove the finalized bit. */
37003 node->definition = false;
37005 /* The first version in the chain corresponds to the default version. */
37006 ninfo->dispatcher_resolver = resolver
37007 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37009 if (TARGET_DEBUG_TARGET)
37010 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37011 get_decl_name (resolver));
37013 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37014 auto_vec<tree, 2> fn_ver_vec;
37016 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37017 vinfo;
37018 vinfo = vinfo->next)
37020 struct cgraph_node *version = vinfo->this_node;
37021 /* Check for virtual functions here again, as by this time it should
37022 have been determined if this function needs a vtable index or
37023 not. This happens for methods in derived classes that override
37024 virtual methods in base classes but are not explicitly marked as
37025 virtual. */
37026 if (DECL_VINDEX (version->decl))
37027 sorry ("Virtual function multiversioning not supported");
37029 fn_ver_vec.safe_push (version->decl);
37032 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37033 cgraph_edge::rebuild_edges ();
37034 pop_cfun ();
37035 return resolver;
37039 /* Hook to determine if one function can safely inline another. */
37041 static bool
37042 rs6000_can_inline_p (tree caller, tree callee)
37044 bool ret = false;
37045 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37046 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37048 /* If callee has no option attributes, then it is ok to inline. */
37049 if (!callee_tree)
37050 ret = true;
37052 /* If caller has no option attributes, but callee does then it is not ok to
37053 inline. */
37054 else if (!caller_tree)
37055 ret = false;
37057 else
37059 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37060 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37062 /* Callee's options should a subset of the caller's, i.e. a vsx function
37063 can inline an altivec function but a non-vsx function can't inline a
37064 vsx function. */
37065 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37066 == callee_opts->x_rs6000_isa_flags)
37067 ret = true;
37070 if (TARGET_DEBUG_TARGET)
37071 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37072 get_decl_name (caller), get_decl_name (callee),
37073 (ret ? "can" : "cannot"));
37075 return ret;
37078 /* Allocate a stack temp and fixup the address so it meets the particular
37079 memory requirements (either offetable or REG+REG addressing). */
37082 rs6000_allocate_stack_temp (machine_mode mode,
37083 bool offsettable_p,
37084 bool reg_reg_p)
37086 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37087 rtx addr = XEXP (stack, 0);
37088 int strict_p = reload_completed;
37090 if (!legitimate_indirect_address_p (addr, strict_p))
37092 if (offsettable_p
37093 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37094 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37096 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37097 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37100 return stack;
37103 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37104 to such a form to deal with memory reference instructions like STFIWX that
37105 only take reg+reg addressing. */
37108 rs6000_address_for_fpconvert (rtx x)
37110 rtx addr;
37112 gcc_assert (MEM_P (x));
37113 addr = XEXP (x, 0);
37114 if (can_create_pseudo_p ()
37115 && ! legitimate_indirect_address_p (addr, reload_completed)
37116 && ! legitimate_indexed_address_p (addr, reload_completed))
37118 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37120 rtx reg = XEXP (addr, 0);
37121 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37122 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37123 gcc_assert (REG_P (reg));
37124 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37125 addr = reg;
37127 else if (GET_CODE (addr) == PRE_MODIFY)
37129 rtx reg = XEXP (addr, 0);
37130 rtx expr = XEXP (addr, 1);
37131 gcc_assert (REG_P (reg));
37132 gcc_assert (GET_CODE (expr) == PLUS);
37133 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37134 addr = reg;
37137 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37140 return x;
37143 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37145 On the RS/6000, all integer constants are acceptable, most won't be valid
37146 for particular insns, though. Only easy FP constants are acceptable. */
37148 static bool
37149 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37151 if (TARGET_ELF && tls_referenced_p (x))
37152 return false;
37154 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37155 || GET_MODE (x) == VOIDmode
37156 || (TARGET_POWERPC64 && mode == DImode)
37157 || easy_fp_constant (x, mode)
37158 || easy_vector_constant (x, mode));
37162 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37164 static bool
37165 chain_already_loaded (rtx_insn *last)
37167 for (; last != NULL; last = PREV_INSN (last))
37169 if (NONJUMP_INSN_P (last))
37171 rtx patt = PATTERN (last);
37173 if (GET_CODE (patt) == SET)
37175 rtx lhs = XEXP (patt, 0);
37177 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37178 return true;
37182 return false;
37185 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37187 void
37188 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37190 const bool direct_call_p
37191 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37192 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37193 rtx toc_load = NULL_RTX;
37194 rtx toc_restore = NULL_RTX;
37195 rtx func_addr;
37196 rtx abi_reg = NULL_RTX;
37197 rtx call[4];
37198 int n_call;
37199 rtx insn;
37201 /* Handle longcall attributes. */
37202 if (INTVAL (cookie) & CALL_LONG)
37203 func_desc = rs6000_longcall_ref (func_desc);
37205 /* Handle indirect calls. */
37206 if (GET_CODE (func_desc) != SYMBOL_REF
37207 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
37209 /* Save the TOC into its reserved slot before the call,
37210 and prepare to restore it after the call. */
37211 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37212 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37213 rtx stack_toc_mem = gen_frame_mem (Pmode,
37214 gen_rtx_PLUS (Pmode, stack_ptr,
37215 stack_toc_offset));
37216 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37217 gen_rtvec (1, stack_toc_offset),
37218 UNSPEC_TOCSLOT);
37219 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37221 /* Can we optimize saving the TOC in the prologue or
37222 do we need to do it at every call? */
37223 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37224 cfun->machine->save_toc_in_prologue = true;
37225 else
37227 MEM_VOLATILE_P (stack_toc_mem) = 1;
37228 emit_move_insn (stack_toc_mem, toc_reg);
37231 if (DEFAULT_ABI == ABI_ELFv2)
37233 /* A function pointer in the ELFv2 ABI is just a plain address, but
37234 the ABI requires it to be loaded into r12 before the call. */
37235 func_addr = gen_rtx_REG (Pmode, 12);
37236 emit_move_insn (func_addr, func_desc);
37237 abi_reg = func_addr;
37239 else
37241 /* A function pointer under AIX is a pointer to a data area whose
37242 first word contains the actual address of the function, whose
37243 second word contains a pointer to its TOC, and whose third word
37244 contains a value to place in the static chain register (r11).
37245 Note that if we load the static chain, our "trampoline" need
37246 not have any executable code. */
37248 /* Load up address of the actual function. */
37249 func_desc = force_reg (Pmode, func_desc);
37250 func_addr = gen_reg_rtx (Pmode);
37251 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
37253 /* Prepare to load the TOC of the called function. Note that the
37254 TOC load must happen immediately before the actual call so
37255 that unwinding the TOC registers works correctly. See the
37256 comment in frob_update_context. */
37257 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37258 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37259 gen_rtx_PLUS (Pmode, func_desc,
37260 func_toc_offset));
37261 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37263 /* If we have a static chain, load it up. But, if the call was
37264 originally direct, the 3rd word has not been written since no
37265 trampoline has been built, so we ought not to load it, lest we
37266 override a static chain value. */
37267 if (!direct_call_p
37268 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37269 && !chain_already_loaded (get_current_sequence ()->next->last))
37271 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37272 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37273 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37274 gen_rtx_PLUS (Pmode, func_desc,
37275 func_sc_offset));
37276 emit_move_insn (sc_reg, func_sc_mem);
37277 abi_reg = sc_reg;
37281 else
37283 /* Direct calls use the TOC: for local calls, the callee will
37284 assume the TOC register is set; for non-local calls, the
37285 PLT stub needs the TOC register. */
37286 abi_reg = toc_reg;
37287 func_addr = func_desc;
37290 /* Create the call. */
37291 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
37292 if (value != NULL_RTX)
37293 call[0] = gen_rtx_SET (value, call[0]);
37294 n_call = 1;
37296 if (toc_load)
37297 call[n_call++] = toc_load;
37298 if (toc_restore)
37299 call[n_call++] = toc_restore;
37301 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
37303 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37304 insn = emit_call_insn (insn);
37306 /* Mention all registers defined by the ABI to hold information
37307 as uses in CALL_INSN_FUNCTION_USAGE. */
37308 if (abi_reg)
37309 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37312 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37314 void
37315 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37317 rtx call[2];
37318 rtx insn;
37320 gcc_assert (INTVAL (cookie) == 0);
37322 /* Create the call. */
37323 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
37324 if (value != NULL_RTX)
37325 call[0] = gen_rtx_SET (value, call[0]);
37327 call[1] = simple_return_rtx;
37329 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37330 insn = emit_call_insn (insn);
37332 /* Note use of the TOC register. */
37333 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37336 /* Return whether we need to always update the saved TOC pointer when we update
37337 the stack pointer. */
37339 static bool
37340 rs6000_save_toc_in_prologue_p (void)
37342 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
37345 #ifdef HAVE_GAS_HIDDEN
37346 # define USE_HIDDEN_LINKONCE 1
37347 #else
37348 # define USE_HIDDEN_LINKONCE 0
37349 #endif
37351 /* Fills in the label name that should be used for a 476 link stack thunk. */
37353 void
37354 get_ppc476_thunk_name (char name[32])
37356 gcc_assert (TARGET_LINK_STACK);
37358 if (USE_HIDDEN_LINKONCE)
37359 sprintf (name, "__ppc476.get_thunk");
37360 else
37361 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
37364 /* This function emits the simple thunk routine that is used to preserve
37365 the link stack on the 476 cpu. */
37367 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
37368 static void
37369 rs6000_code_end (void)
37371 char name[32];
37372 tree decl;
37374 if (!TARGET_LINK_STACK)
37375 return;
37377 get_ppc476_thunk_name (name);
37379 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
37380 build_function_type_list (void_type_node, NULL_TREE));
37381 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
37382 NULL_TREE, void_type_node);
37383 TREE_PUBLIC (decl) = 1;
37384 TREE_STATIC (decl) = 1;
37386 #if RS6000_WEAK
37387 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
37389 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
37390 targetm.asm_out.unique_section (decl, 0);
37391 switch_to_section (get_named_section (decl, NULL, 0));
37392 DECL_WEAK (decl) = 1;
37393 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
37394 targetm.asm_out.globalize_label (asm_out_file, name);
37395 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
37396 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
37398 else
37399 #endif
37401 switch_to_section (text_section);
37402 ASM_OUTPUT_LABEL (asm_out_file, name);
37405 DECL_INITIAL (decl) = make_node (BLOCK);
37406 current_function_decl = decl;
37407 allocate_struct_function (decl, false);
37408 init_function_start (decl);
37409 first_function_block_is_cold = false;
37410 /* Make sure unwind info is emitted for the thunk if needed. */
37411 final_start_function (emit_barrier (), asm_out_file, 1);
37413 fputs ("\tblr\n", asm_out_file);
37415 final_end_function ();
37416 init_insn_lengths ();
37417 free_after_compilation (cfun);
37418 set_cfun (NULL);
37419 current_function_decl = NULL;
37422 /* Add r30 to hard reg set if the prologue sets it up and it is not
37423 pic_offset_table_rtx. */
37425 static void
37426 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
37428 if (!TARGET_SINGLE_PIC_BASE
37429 && TARGET_TOC
37430 && TARGET_MINIMAL_TOC
37431 && !constant_pool_empty_p ())
37432 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
37433 if (cfun->machine->split_stack_argp_used)
37434 add_to_hard_reg_set (&set->set, Pmode, 12);
37436 /* Make sure the hard reg set doesn't include r2, which was possibly added
37437 via PIC_OFFSET_TABLE_REGNUM. */
37438 if (TARGET_TOC)
37439 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
37443 /* Helper function for rs6000_split_logical to emit a logical instruction after
37444 spliting the operation to single GPR registers.
37446 DEST is the destination register.
37447 OP1 and OP2 are the input source registers.
37448 CODE is the base operation (AND, IOR, XOR, NOT).
37449 MODE is the machine mode.
37450 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37451 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37452 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37454 static void
37455 rs6000_split_logical_inner (rtx dest,
37456 rtx op1,
37457 rtx op2,
37458 enum rtx_code code,
37459 machine_mode mode,
37460 bool complement_final_p,
37461 bool complement_op1_p,
37462 bool complement_op2_p)
37464 rtx bool_rtx;
37466 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
37467 if (op2 && GET_CODE (op2) == CONST_INT
37468 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
37469 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37471 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
37472 HOST_WIDE_INT value = INTVAL (op2) & mask;
37474 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
37475 if (code == AND)
37477 if (value == 0)
37479 emit_insn (gen_rtx_SET (dest, const0_rtx));
37480 return;
37483 else if (value == mask)
37485 if (!rtx_equal_p (dest, op1))
37486 emit_insn (gen_rtx_SET (dest, op1));
37487 return;
37491 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
37492 into separate ORI/ORIS or XORI/XORIS instrucitons. */
37493 else if (code == IOR || code == XOR)
37495 if (value == 0)
37497 if (!rtx_equal_p (dest, op1))
37498 emit_insn (gen_rtx_SET (dest, op1));
37499 return;
37504 if (code == AND && mode == SImode
37505 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37507 emit_insn (gen_andsi3 (dest, op1, op2));
37508 return;
37511 if (complement_op1_p)
37512 op1 = gen_rtx_NOT (mode, op1);
37514 if (complement_op2_p)
37515 op2 = gen_rtx_NOT (mode, op2);
37517 /* For canonical RTL, if only one arm is inverted it is the first. */
37518 if (!complement_op1_p && complement_op2_p)
37519 std::swap (op1, op2);
37521 bool_rtx = ((code == NOT)
37522 ? gen_rtx_NOT (mode, op1)
37523 : gen_rtx_fmt_ee (code, mode, op1, op2));
37525 if (complement_final_p)
37526 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
37528 emit_insn (gen_rtx_SET (dest, bool_rtx));
37531 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
37532 operations are split immediately during RTL generation to allow for more
37533 optimizations of the AND/IOR/XOR.
37535 OPERANDS is an array containing the destination and two input operands.
37536 CODE is the base operation (AND, IOR, XOR, NOT).
37537 MODE is the machine mode.
37538 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37539 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37540 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
37541 CLOBBER_REG is either NULL or a scratch register of type CC to allow
37542 formation of the AND instructions. */
37544 static void
37545 rs6000_split_logical_di (rtx operands[3],
37546 enum rtx_code code,
37547 bool complement_final_p,
37548 bool complement_op1_p,
37549 bool complement_op2_p)
37551 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
37552 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
37553 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
37554 enum hi_lo { hi = 0, lo = 1 };
37555 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
37556 size_t i;
37558 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
37559 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
37560 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
37561 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
37563 if (code == NOT)
37564 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
37565 else
37567 if (GET_CODE (operands[2]) != CONST_INT)
37569 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
37570 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
37572 else
37574 HOST_WIDE_INT value = INTVAL (operands[2]);
37575 HOST_WIDE_INT value_hi_lo[2];
37577 gcc_assert (!complement_final_p);
37578 gcc_assert (!complement_op1_p);
37579 gcc_assert (!complement_op2_p);
37581 value_hi_lo[hi] = value >> 32;
37582 value_hi_lo[lo] = value & lower_32bits;
37584 for (i = 0; i < 2; i++)
37586 HOST_WIDE_INT sub_value = value_hi_lo[i];
37588 if (sub_value & sign_bit)
37589 sub_value |= upper_32bits;
37591 op2_hi_lo[i] = GEN_INT (sub_value);
37593 /* If this is an AND instruction, check to see if we need to load
37594 the value in a register. */
37595 if (code == AND && sub_value != -1 && sub_value != 0
37596 && !and_operand (op2_hi_lo[i], SImode))
37597 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
37602 for (i = 0; i < 2; i++)
37604 /* Split large IOR/XOR operations. */
37605 if ((code == IOR || code == XOR)
37606 && GET_CODE (op2_hi_lo[i]) == CONST_INT
37607 && !complement_final_p
37608 && !complement_op1_p
37609 && !complement_op2_p
37610 && !logical_const_operand (op2_hi_lo[i], SImode))
37612 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
37613 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
37614 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
37615 rtx tmp = gen_reg_rtx (SImode);
37617 /* Make sure the constant is sign extended. */
37618 if ((hi_16bits & sign_bit) != 0)
37619 hi_16bits |= upper_32bits;
37621 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
37622 code, SImode, false, false, false);
37624 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
37625 code, SImode, false, false, false);
37627 else
37628 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
37629 code, SImode, complement_final_p,
37630 complement_op1_p, complement_op2_p);
37633 return;
37636 /* Split the insns that make up boolean operations operating on multiple GPR
37637 registers. The boolean MD patterns ensure that the inputs either are
37638 exactly the same as the output registers, or there is no overlap.
37640 OPERANDS is an array containing the destination and two input operands.
37641 CODE is the base operation (AND, IOR, XOR, NOT).
37642 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37643 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37644 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37646 void
37647 rs6000_split_logical (rtx operands[3],
37648 enum rtx_code code,
37649 bool complement_final_p,
37650 bool complement_op1_p,
37651 bool complement_op2_p)
37653 machine_mode mode = GET_MODE (operands[0]);
37654 machine_mode sub_mode;
37655 rtx op0, op1, op2;
37656 int sub_size, regno0, regno1, nregs, i;
37658 /* If this is DImode, use the specialized version that can run before
37659 register allocation. */
37660 if (mode == DImode && !TARGET_POWERPC64)
37662 rs6000_split_logical_di (operands, code, complement_final_p,
37663 complement_op1_p, complement_op2_p);
37664 return;
37667 op0 = operands[0];
37668 op1 = operands[1];
37669 op2 = (code == NOT) ? NULL_RTX : operands[2];
37670 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
37671 sub_size = GET_MODE_SIZE (sub_mode);
37672 regno0 = REGNO (op0);
37673 regno1 = REGNO (op1);
37675 gcc_assert (reload_completed);
37676 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37677 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37679 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
37680 gcc_assert (nregs > 1);
37682 if (op2 && REG_P (op2))
37683 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
37685 for (i = 0; i < nregs; i++)
37687 int offset = i * sub_size;
37688 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
37689 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
37690 rtx sub_op2 = ((code == NOT)
37691 ? NULL_RTX
37692 : simplify_subreg (sub_mode, op2, mode, offset));
37694 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
37695 complement_final_p, complement_op1_p,
37696 complement_op2_p);
37699 return;
37703 /* Return true if the peephole2 can combine a load involving a combination of
37704 an addis instruction and a load with an offset that can be fused together on
37705 a power8. */
37707 bool
37708 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
37709 rtx addis_value, /* addis value. */
37710 rtx target, /* target register that is loaded. */
37711 rtx mem) /* bottom part of the memory addr. */
37713 rtx addr;
37714 rtx base_reg;
37716 /* Validate arguments. */
37717 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
37718 return false;
37720 if (!base_reg_operand (target, GET_MODE (target)))
37721 return false;
37723 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
37724 return false;
37726 /* Allow sign/zero extension. */
37727 if (GET_CODE (mem) == ZERO_EXTEND
37728 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
37729 mem = XEXP (mem, 0);
37731 if (!MEM_P (mem))
37732 return false;
37734 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
37735 return false;
37737 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
37738 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
37739 return false;
37741 /* Validate that the register used to load the high value is either the
37742 register being loaded, or we can safely replace its use.
37744 This function is only called from the peephole2 pass and we assume that
37745 there are 2 instructions in the peephole (addis and load), so we want to
37746 check if the target register was not used in the memory address and the
37747 register to hold the addis result is dead after the peephole. */
37748 if (REGNO (addis_reg) != REGNO (target))
37750 if (reg_mentioned_p (target, mem))
37751 return false;
37753 if (!peep2_reg_dead_p (2, addis_reg))
37754 return false;
37756 /* If the target register being loaded is the stack pointer, we must
37757 avoid loading any other value into it, even temporarily. */
37758 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
37759 return false;
37762 base_reg = XEXP (addr, 0);
37763 return REGNO (addis_reg) == REGNO (base_reg);
37766 /* During the peephole2 pass, adjust and expand the insns for a load fusion
37767 sequence. We adjust the addis register to use the target register. If the
37768 load sign extends, we adjust the code to do the zero extending load, and an
37769 explicit sign extension later since the fusion only covers zero extending
37770 loads.
37772 The operands are:
37773 operands[0] register set with addis (to be replaced with target)
37774 operands[1] value set via addis
37775 operands[2] target register being loaded
37776 operands[3] D-form memory reference using operands[0]. */
37778 void
37779 expand_fusion_gpr_load (rtx *operands)
37781 rtx addis_value = operands[1];
37782 rtx target = operands[2];
37783 rtx orig_mem = operands[3];
37784 rtx new_addr, new_mem, orig_addr, offset;
37785 enum rtx_code plus_or_lo_sum;
37786 machine_mode target_mode = GET_MODE (target);
37787 machine_mode extend_mode = target_mode;
37788 machine_mode ptr_mode = Pmode;
37789 enum rtx_code extend = UNKNOWN;
37791 if (GET_CODE (orig_mem) == ZERO_EXTEND
37792 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
37794 extend = GET_CODE (orig_mem);
37795 orig_mem = XEXP (orig_mem, 0);
37796 target_mode = GET_MODE (orig_mem);
37799 gcc_assert (MEM_P (orig_mem));
37801 orig_addr = XEXP (orig_mem, 0);
37802 plus_or_lo_sum = GET_CODE (orig_addr);
37803 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37805 offset = XEXP (orig_addr, 1);
37806 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37807 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37809 if (extend != UNKNOWN)
37810 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
37812 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
37813 UNSPEC_FUSION_GPR);
37814 emit_insn (gen_rtx_SET (target, new_mem));
37816 if (extend == SIGN_EXTEND)
37818 int sub_off = ((BYTES_BIG_ENDIAN)
37819 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
37820 : 0);
37821 rtx sign_reg
37822 = simplify_subreg (target_mode, target, extend_mode, sub_off);
37824 emit_insn (gen_rtx_SET (target,
37825 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
37828 return;
37831 /* Emit the addis instruction that will be part of a fused instruction
37832 sequence. */
37834 void
37835 emit_fusion_addis (rtx target, rtx addis_value)
37837 rtx fuse_ops[10];
37838 const char *addis_str = NULL;
37840 /* Emit the addis instruction. */
37841 fuse_ops[0] = target;
37842 if (satisfies_constraint_L (addis_value))
37844 fuse_ops[1] = addis_value;
37845 addis_str = "lis %0,%v1";
37848 else if (GET_CODE (addis_value) == PLUS)
37850 rtx op0 = XEXP (addis_value, 0);
37851 rtx op1 = XEXP (addis_value, 1);
37853 if (REG_P (op0) && CONST_INT_P (op1)
37854 && satisfies_constraint_L (op1))
37856 fuse_ops[1] = op0;
37857 fuse_ops[2] = op1;
37858 addis_str = "addis %0,%1,%v2";
37862 else if (GET_CODE (addis_value) == HIGH)
37864 rtx value = XEXP (addis_value, 0);
37865 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
37867 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
37868 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
37869 if (TARGET_ELF)
37870 addis_str = "addis %0,%2,%1@toc@ha";
37872 else if (TARGET_XCOFF)
37873 addis_str = "addis %0,%1@u(%2)";
37875 else
37876 gcc_unreachable ();
37879 else if (GET_CODE (value) == PLUS)
37881 rtx op0 = XEXP (value, 0);
37882 rtx op1 = XEXP (value, 1);
37884 if (GET_CODE (op0) == UNSPEC
37885 && XINT (op0, 1) == UNSPEC_TOCREL
37886 && CONST_INT_P (op1))
37888 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
37889 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
37890 fuse_ops[3] = op1;
37891 if (TARGET_ELF)
37892 addis_str = "addis %0,%2,%1+%3@toc@ha";
37894 else if (TARGET_XCOFF)
37895 addis_str = "addis %0,%1+%3@u(%2)";
37897 else
37898 gcc_unreachable ();
37902 else if (satisfies_constraint_L (value))
37904 fuse_ops[1] = value;
37905 addis_str = "lis %0,%v1";
37908 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
37910 fuse_ops[1] = value;
37911 addis_str = "lis %0,%1@ha";
37915 if (!addis_str)
37916 fatal_insn ("Could not generate addis value for fusion", addis_value);
37918 output_asm_insn (addis_str, fuse_ops);
37921 /* Emit a D-form load or store instruction that is the second instruction
37922 of a fusion sequence. */
37924 void
37925 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
37926 const char *insn_str)
37928 rtx fuse_ops[10];
37929 char insn_template[80];
37931 fuse_ops[0] = load_store_reg;
37932 fuse_ops[1] = addis_reg;
37934 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
37936 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
37937 fuse_ops[2] = offset;
37938 output_asm_insn (insn_template, fuse_ops);
37941 else if (GET_CODE (offset) == UNSPEC
37942 && XINT (offset, 1) == UNSPEC_TOCREL)
37944 if (TARGET_ELF)
37945 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
37947 else if (TARGET_XCOFF)
37948 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37950 else
37951 gcc_unreachable ();
37953 fuse_ops[2] = XVECEXP (offset, 0, 0);
37954 output_asm_insn (insn_template, fuse_ops);
37957 else if (GET_CODE (offset) == PLUS
37958 && GET_CODE (XEXP (offset, 0)) == UNSPEC
37959 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
37960 && CONST_INT_P (XEXP (offset, 1)))
37962 rtx tocrel_unspec = XEXP (offset, 0);
37963 if (TARGET_ELF)
37964 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
37966 else if (TARGET_XCOFF)
37967 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
37969 else
37970 gcc_unreachable ();
37972 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
37973 fuse_ops[3] = XEXP (offset, 1);
37974 output_asm_insn (insn_template, fuse_ops);
37977 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
37979 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37981 fuse_ops[2] = offset;
37982 output_asm_insn (insn_template, fuse_ops);
37985 else
37986 fatal_insn ("Unable to generate load/store offset for fusion", offset);
37988 return;
37991 /* Wrap a TOC address that can be fused to indicate that special fusion
37992 processing is needed. */
37995 fusion_wrap_memory_address (rtx old_mem)
37997 rtx old_addr = XEXP (old_mem, 0);
37998 rtvec v = gen_rtvec (1, old_addr);
37999 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
38000 return replace_equiv_address_nv (old_mem, new_addr, false);
38003 /* Given an address, convert it into the addis and load offset parts. Addresses
38004 created during the peephole2 process look like:
38005 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38006 (unspec [(...)] UNSPEC_TOCREL))
38008 Addresses created via toc fusion look like:
38009 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38011 static void
38012 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38014 rtx hi, lo;
38016 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38018 lo = XVECEXP (addr, 0, 0);
38019 hi = gen_rtx_HIGH (Pmode, lo);
38021 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38023 hi = XEXP (addr, 0);
38024 lo = XEXP (addr, 1);
38026 else
38027 gcc_unreachable ();
38029 *p_hi = hi;
38030 *p_lo = lo;
38033 /* Return a string to fuse an addis instruction with a gpr load to the same
38034 register that we loaded up the addis instruction. The address that is used
38035 is the logical address that was formed during peephole2:
38036 (lo_sum (high) (low-part))
38038 Or the address is the TOC address that is wrapped before register allocation:
38039 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38041 The code is complicated, so we call output_asm_insn directly, and just
38042 return "". */
38044 const char *
38045 emit_fusion_gpr_load (rtx target, rtx mem)
38047 rtx addis_value;
38048 rtx addr;
38049 rtx load_offset;
38050 const char *load_str = NULL;
38051 machine_mode mode;
38053 if (GET_CODE (mem) == ZERO_EXTEND)
38054 mem = XEXP (mem, 0);
38056 gcc_assert (REG_P (target) && MEM_P (mem));
38058 addr = XEXP (mem, 0);
38059 fusion_split_address (addr, &addis_value, &load_offset);
38061 /* Now emit the load instruction to the same register. */
38062 mode = GET_MODE (mem);
38063 switch (mode)
38065 case E_QImode:
38066 load_str = "lbz";
38067 break;
38069 case E_HImode:
38070 load_str = "lhz";
38071 break;
38073 case E_SImode:
38074 case E_SFmode:
38075 load_str = "lwz";
38076 break;
38078 case E_DImode:
38079 case E_DFmode:
38080 gcc_assert (TARGET_POWERPC64);
38081 load_str = "ld";
38082 break;
38084 default:
38085 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38088 /* Emit the addis instruction. */
38089 emit_fusion_addis (target, addis_value);
38091 /* Emit the D-form load instruction. */
38092 emit_fusion_load_store (target, target, load_offset, load_str);
38094 return "";
38098 /* Return true if the peephole2 can combine a load/store involving a
38099 combination of an addis instruction and the memory operation. This was
38100 added to the ISA 3.0 (power9) hardware. */
38102 bool
38103 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38104 rtx addis_value, /* addis value. */
38105 rtx dest, /* destination (memory or register). */
38106 rtx src) /* source (register or memory). */
38108 rtx addr, mem, offset;
38109 machine_mode mode = GET_MODE (src);
38111 /* Validate arguments. */
38112 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38113 return false;
38115 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38116 return false;
38118 /* Ignore extend operations that are part of the load. */
38119 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38120 src = XEXP (src, 0);
38122 /* Test for memory<-register or register<-memory. */
38123 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38125 if (!MEM_P (dest))
38126 return false;
38128 mem = dest;
38131 else if (MEM_P (src))
38133 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38134 return false;
38136 mem = src;
38139 else
38140 return false;
38142 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38143 if (GET_CODE (addr) == PLUS)
38145 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38146 return false;
38148 return satisfies_constraint_I (XEXP (addr, 1));
38151 else if (GET_CODE (addr) == LO_SUM)
38153 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38154 return false;
38156 offset = XEXP (addr, 1);
38157 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38158 return small_toc_ref (offset, GET_MODE (offset));
38160 else if (TARGET_ELF && !TARGET_POWERPC64)
38161 return CONSTANT_P (offset);
38164 return false;
38167 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38168 load sequence.
38170 The operands are:
38171 operands[0] register set with addis
38172 operands[1] value set via addis
38173 operands[2] target register being loaded
38174 operands[3] D-form memory reference using operands[0].
38176 This is similar to the fusion introduced with power8, except it scales to
38177 both loads/stores and does not require the result register to be the same as
38178 the base register. At the moment, we only do this if register set with addis
38179 is dead. */
38181 void
38182 expand_fusion_p9_load (rtx *operands)
38184 rtx tmp_reg = operands[0];
38185 rtx addis_value = operands[1];
38186 rtx target = operands[2];
38187 rtx orig_mem = operands[3];
38188 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38189 enum rtx_code plus_or_lo_sum;
38190 machine_mode target_mode = GET_MODE (target);
38191 machine_mode extend_mode = target_mode;
38192 machine_mode ptr_mode = Pmode;
38193 enum rtx_code extend = UNKNOWN;
38195 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
38197 extend = GET_CODE (orig_mem);
38198 orig_mem = XEXP (orig_mem, 0);
38199 target_mode = GET_MODE (orig_mem);
38202 gcc_assert (MEM_P (orig_mem));
38204 orig_addr = XEXP (orig_mem, 0);
38205 plus_or_lo_sum = GET_CODE (orig_addr);
38206 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38208 offset = XEXP (orig_addr, 1);
38209 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38210 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38212 if (extend != UNKNOWN)
38213 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
38215 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38216 UNSPEC_FUSION_P9);
38218 set = gen_rtx_SET (target, new_mem);
38219 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38220 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38221 emit_insn (insn);
38223 return;
38226 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38227 store sequence.
38229 The operands are:
38230 operands[0] register set with addis
38231 operands[1] value set via addis
38232 operands[2] target D-form memory being stored to
38233 operands[3] register being stored
38235 This is similar to the fusion introduced with power8, except it scales to
38236 both loads/stores and does not require the result register to be the same as
38237 the base register. At the moment, we only do this if register set with addis
38238 is dead. */
38240 void
38241 expand_fusion_p9_store (rtx *operands)
38243 rtx tmp_reg = operands[0];
38244 rtx addis_value = operands[1];
38245 rtx orig_mem = operands[2];
38246 rtx src = operands[3];
38247 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
38248 enum rtx_code plus_or_lo_sum;
38249 machine_mode target_mode = GET_MODE (orig_mem);
38250 machine_mode ptr_mode = Pmode;
38252 gcc_assert (MEM_P (orig_mem));
38254 orig_addr = XEXP (orig_mem, 0);
38255 plus_or_lo_sum = GET_CODE (orig_addr);
38256 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38258 offset = XEXP (orig_addr, 1);
38259 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38260 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38262 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
38263 UNSPEC_FUSION_P9);
38265 set = gen_rtx_SET (new_mem, new_src);
38266 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38267 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38268 emit_insn (insn);
38270 return;
38273 /* Return a string to fuse an addis instruction with a load using extended
38274 fusion. The address that is used is the logical address that was formed
38275 during peephole2: (lo_sum (high) (low-part))
38277 The code is complicated, so we call output_asm_insn directly, and just
38278 return "". */
38280 const char *
38281 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
38283 machine_mode mode = GET_MODE (reg);
38284 rtx hi;
38285 rtx lo;
38286 rtx addr;
38287 const char *load_string;
38288 int r;
38290 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
38292 mem = XEXP (mem, 0);
38293 mode = GET_MODE (mem);
38296 if (GET_CODE (reg) == SUBREG)
38298 gcc_assert (SUBREG_BYTE (reg) == 0);
38299 reg = SUBREG_REG (reg);
38302 if (!REG_P (reg))
38303 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
38305 r = REGNO (reg);
38306 if (FP_REGNO_P (r))
38308 if (mode == SFmode)
38309 load_string = "lfs";
38310 else if (mode == DFmode || mode == DImode)
38311 load_string = "lfd";
38312 else
38313 gcc_unreachable ();
38315 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
38317 if (mode == SFmode)
38318 load_string = "lxssp";
38319 else if (mode == DFmode || mode == DImode)
38320 load_string = "lxsd";
38321 else
38322 gcc_unreachable ();
38324 else if (INT_REGNO_P (r))
38326 switch (mode)
38328 case E_QImode:
38329 load_string = "lbz";
38330 break;
38331 case E_HImode:
38332 load_string = "lhz";
38333 break;
38334 case E_SImode:
38335 case E_SFmode:
38336 load_string = "lwz";
38337 break;
38338 case E_DImode:
38339 case E_DFmode:
38340 if (!TARGET_POWERPC64)
38341 gcc_unreachable ();
38342 load_string = "ld";
38343 break;
38344 default:
38345 gcc_unreachable ();
38348 else
38349 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
38351 if (!MEM_P (mem))
38352 fatal_insn ("emit_fusion_p9_load not MEM", mem);
38354 addr = XEXP (mem, 0);
38355 fusion_split_address (addr, &hi, &lo);
38357 /* Emit the addis instruction. */
38358 emit_fusion_addis (tmp_reg, hi);
38360 /* Emit the D-form load instruction. */
38361 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
38363 return "";
38366 /* Return a string to fuse an addis instruction with a store using extended
38367 fusion. The address that is used is the logical address that was formed
38368 during peephole2: (lo_sum (high) (low-part))
38370 The code is complicated, so we call output_asm_insn directly, and just
38371 return "". */
38373 const char *
38374 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
38376 machine_mode mode = GET_MODE (reg);
38377 rtx hi;
38378 rtx lo;
38379 rtx addr;
38380 const char *store_string;
38381 int r;
38383 if (GET_CODE (reg) == SUBREG)
38385 gcc_assert (SUBREG_BYTE (reg) == 0);
38386 reg = SUBREG_REG (reg);
38389 if (!REG_P (reg))
38390 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
38392 r = REGNO (reg);
38393 if (FP_REGNO_P (r))
38395 if (mode == SFmode)
38396 store_string = "stfs";
38397 else if (mode == DFmode)
38398 store_string = "stfd";
38399 else
38400 gcc_unreachable ();
38402 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
38404 if (mode == SFmode)
38405 store_string = "stxssp";
38406 else if (mode == DFmode || mode == DImode)
38407 store_string = "stxsd";
38408 else
38409 gcc_unreachable ();
38411 else if (INT_REGNO_P (r))
38413 switch (mode)
38415 case E_QImode:
38416 store_string = "stb";
38417 break;
38418 case E_HImode:
38419 store_string = "sth";
38420 break;
38421 case E_SImode:
38422 case E_SFmode:
38423 store_string = "stw";
38424 break;
38425 case E_DImode:
38426 case E_DFmode:
38427 if (!TARGET_POWERPC64)
38428 gcc_unreachable ();
38429 store_string = "std";
38430 break;
38431 default:
38432 gcc_unreachable ();
38435 else
38436 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
38438 if (!MEM_P (mem))
38439 fatal_insn ("emit_fusion_p9_store not MEM", mem);
38441 addr = XEXP (mem, 0);
38442 fusion_split_address (addr, &hi, &lo);
38444 /* Emit the addis instruction. */
38445 emit_fusion_addis (tmp_reg, hi);
38447 /* Emit the D-form load instruction. */
38448 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
38450 return "";
38453 #ifdef RS6000_GLIBC_ATOMIC_FENV
38454 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38455 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38456 #endif
38458 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38460 static void
38461 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38463 if (!TARGET_HARD_FLOAT)
38465 #ifdef RS6000_GLIBC_ATOMIC_FENV
38466 if (atomic_hold_decl == NULL_TREE)
38468 atomic_hold_decl
38469 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38470 get_identifier ("__atomic_feholdexcept"),
38471 build_function_type_list (void_type_node,
38472 double_ptr_type_node,
38473 NULL_TREE));
38474 TREE_PUBLIC (atomic_hold_decl) = 1;
38475 DECL_EXTERNAL (atomic_hold_decl) = 1;
38478 if (atomic_clear_decl == NULL_TREE)
38480 atomic_clear_decl
38481 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38482 get_identifier ("__atomic_feclearexcept"),
38483 build_function_type_list (void_type_node,
38484 NULL_TREE));
38485 TREE_PUBLIC (atomic_clear_decl) = 1;
38486 DECL_EXTERNAL (atomic_clear_decl) = 1;
38489 tree const_double = build_qualified_type (double_type_node,
38490 TYPE_QUAL_CONST);
38491 tree const_double_ptr = build_pointer_type (const_double);
38492 if (atomic_update_decl == NULL_TREE)
38494 atomic_update_decl
38495 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38496 get_identifier ("__atomic_feupdateenv"),
38497 build_function_type_list (void_type_node,
38498 const_double_ptr,
38499 NULL_TREE));
38500 TREE_PUBLIC (atomic_update_decl) = 1;
38501 DECL_EXTERNAL (atomic_update_decl) = 1;
38504 tree fenv_var = create_tmp_var_raw (double_type_node);
38505 TREE_ADDRESSABLE (fenv_var) = 1;
38506 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
38508 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
38509 *clear = build_call_expr (atomic_clear_decl, 0);
38510 *update = build_call_expr (atomic_update_decl, 1,
38511 fold_convert (const_double_ptr, fenv_addr));
38512 #endif
38513 return;
38516 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
38517 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
38518 tree call_mffs = build_call_expr (mffs, 0);
38520 /* Generates the equivalent of feholdexcept (&fenv_var)
38522 *fenv_var = __builtin_mffs ();
38523 double fenv_hold;
38524 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
38525 __builtin_mtfsf (0xff, fenv_hold); */
38527 /* Mask to clear everything except for the rounding modes and non-IEEE
38528 arithmetic flag. */
38529 const unsigned HOST_WIDE_INT hold_exception_mask =
38530 HOST_WIDE_INT_C (0xffffffff00000007);
38532 tree fenv_var = create_tmp_var_raw (double_type_node);
38534 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
38536 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
38537 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38538 build_int_cst (uint64_type_node,
38539 hold_exception_mask));
38541 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38542 fenv_llu_and);
38544 tree hold_mtfsf = build_call_expr (mtfsf, 2,
38545 build_int_cst (unsigned_type_node, 0xff),
38546 fenv_hold_mtfsf);
38548 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
38550 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
38552 double fenv_clear = __builtin_mffs ();
38553 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
38554 __builtin_mtfsf (0xff, fenv_clear); */
38556 /* Mask to clear everything except for the rounding modes and non-IEEE
38557 arithmetic flag. */
38558 const unsigned HOST_WIDE_INT clear_exception_mask =
38559 HOST_WIDE_INT_C (0xffffffff00000000);
38561 tree fenv_clear = create_tmp_var_raw (double_type_node);
38563 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
38565 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
38566 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
38567 fenv_clean_llu,
38568 build_int_cst (uint64_type_node,
38569 clear_exception_mask));
38571 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38572 fenv_clear_llu_and);
38574 tree clear_mtfsf = build_call_expr (mtfsf, 2,
38575 build_int_cst (unsigned_type_node, 0xff),
38576 fenv_clear_mtfsf);
38578 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
38580 /* Generates the equivalent of feupdateenv (&fenv_var)
38582 double old_fenv = __builtin_mffs ();
38583 double fenv_update;
38584 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
38585 (*(uint64_t*)fenv_var 0x1ff80fff);
38586 __builtin_mtfsf (0xff, fenv_update); */
38588 const unsigned HOST_WIDE_INT update_exception_mask =
38589 HOST_WIDE_INT_C (0xffffffff1fffff00);
38590 const unsigned HOST_WIDE_INT new_exception_mask =
38591 HOST_WIDE_INT_C (0x1ff80fff);
38593 tree old_fenv = create_tmp_var_raw (double_type_node);
38594 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
38596 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
38597 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
38598 build_int_cst (uint64_type_node,
38599 update_exception_mask));
38601 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38602 build_int_cst (uint64_type_node,
38603 new_exception_mask));
38605 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
38606 old_llu_and, new_llu_and);
38608 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38609 new_llu_mask);
38611 tree update_mtfsf = build_call_expr (mtfsf, 2,
38612 build_int_cst (unsigned_type_node, 0xff),
38613 fenv_update_mtfsf);
38615 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
38618 void
38619 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
38621 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38623 rtx_tmp0 = gen_reg_rtx (V2DFmode);
38624 rtx_tmp1 = gen_reg_rtx (V2DFmode);
38626 /* The destination of the vmrgew instruction layout is:
38627 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
38628 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
38629 vmrgew instruction will be correct. */
38630 if (BYTES_BIG_ENDIAN)
38632 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
38633 GEN_INT (0)));
38634 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
38635 GEN_INT (3)));
38637 else
38639 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
38640 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
38643 rtx_tmp2 = gen_reg_rtx (V4SFmode);
38644 rtx_tmp3 = gen_reg_rtx (V4SFmode);
38646 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
38647 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
38649 if (BYTES_BIG_ENDIAN)
38650 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
38651 else
38652 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
38655 void
38656 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
38658 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38660 rtx_tmp0 = gen_reg_rtx (V2DImode);
38661 rtx_tmp1 = gen_reg_rtx (V2DImode);
38663 /* The destination of the vmrgew instruction layout is:
38664 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
38665 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
38666 vmrgew instruction will be correct. */
38667 if (BYTES_BIG_ENDIAN)
38669 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
38670 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
38672 else
38674 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
38675 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
38678 rtx_tmp2 = gen_reg_rtx (V4SFmode);
38679 rtx_tmp3 = gen_reg_rtx (V4SFmode);
38681 if (signed_convert)
38683 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
38684 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
38686 else
38688 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
38689 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
38692 if (BYTES_BIG_ENDIAN)
38693 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
38694 else
38695 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
38698 void
38699 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
38700 rtx src2)
38702 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38704 rtx_tmp0 = gen_reg_rtx (V2DFmode);
38705 rtx_tmp1 = gen_reg_rtx (V2DFmode);
38707 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
38708 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
38710 rtx_tmp2 = gen_reg_rtx (V4SImode);
38711 rtx_tmp3 = gen_reg_rtx (V4SImode);
38713 if (signed_convert)
38715 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
38716 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
38718 else
38720 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
38721 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
38724 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
38727 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
38729 static bool
38730 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
38731 optimization_type opt_type)
38733 switch (op)
38735 case rsqrt_optab:
38736 return (opt_type == OPTIMIZE_FOR_SPEED
38737 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
38739 default:
38740 return true;
38744 /* Implement TARGET_CONSTANT_ALIGNMENT. */
38746 static HOST_WIDE_INT
38747 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
38749 if (TREE_CODE (exp) == STRING_CST
38750 && (STRICT_ALIGNMENT || !optimize_size))
38751 return MAX (align, BITS_PER_WORD);
38752 return align;
38755 /* Implement TARGET_STARTING_FRAME_OFFSET. */
38757 static HOST_WIDE_INT
38758 rs6000_starting_frame_offset (void)
38760 if (FRAME_GROWS_DOWNWARD)
38761 return 0;
38762 return RS6000_STARTING_FRAME_OFFSET;
38766 /* Create an alias for a mangled name where we have changed the mangling (in
38767 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
38768 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
38770 #if TARGET_ELF && RS6000_WEAK
38771 static void
38772 rs6000_globalize_decl_name (FILE * stream, tree decl)
38774 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
38776 targetm.asm_out.globalize_label (stream, name);
38778 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
38780 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
38781 const char *old_name;
38783 ieee128_mangling_gcc_8_1 = true;
38784 lang_hooks.set_decl_assembler_name (decl);
38785 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
38786 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
38787 ieee128_mangling_gcc_8_1 = false;
38789 if (strcmp (name, old_name) != 0)
38791 fprintf (stream, "\t.weak %s\n", old_name);
38792 fprintf (stream, "\t.set %s,%s\n", old_name, name);
38796 #endif
38799 struct gcc_target targetm = TARGET_INITIALIZER;
38801 #include "gt-rs6000.h"